xref: /petsc/src/mat/tests/ex302k.kokkos.cxx (revision c0c276a7a9f347b22187dda26ae7d35d5b9ed8a2)
1*c0c276a7Ssdargavi static char help[] = "Testing MatCreateSeqAIJKokkosWithKokkosViews() and building Mat on the device.\n\n";
2*c0c276a7Ssdargavi 
3*c0c276a7Ssdargavi #include <petscvec_kokkos.hpp>
4*c0c276a7Ssdargavi #include <petscdevice.h>
5*c0c276a7Ssdargavi #include <petscmat.h>
6*c0c276a7Ssdargavi #include <petscmat_kokkos.hpp>
7*c0c276a7Ssdargavi #include <Kokkos_Core.hpp>
8*c0c276a7Ssdargavi #include <Kokkos_DualView.hpp>
9*c0c276a7Ssdargavi 
10*c0c276a7Ssdargavi using HostMirrorMemorySpace = Kokkos::DualView<PetscScalar *>::host_mirror_space::memory_space;
11*c0c276a7Ssdargavi 
12*c0c276a7Ssdargavi int main(int argc, char **argv)
13*c0c276a7Ssdargavi {
14*c0c276a7Ssdargavi   Mat             A, B;
15*c0c276a7Ssdargavi   PetscInt        i, j, column, M, N, m, n, m_ab, n_ab;
16*c0c276a7Ssdargavi   PetscInt       *di, *dj, *oi, *oj, nd;
17*c0c276a7Ssdargavi   const PetscInt *garray;
18*c0c276a7Ssdargavi   PetscInt       *garray_h;
19*c0c276a7Ssdargavi   PetscScalar    *oa, *da;
20*c0c276a7Ssdargavi   PetscScalar     value;
21*c0c276a7Ssdargavi   PetscRandom     rctx;
22*c0c276a7Ssdargavi   PetscBool       equal, done;
23*c0c276a7Ssdargavi   Mat             AA, AB;
24*c0c276a7Ssdargavi   PetscMPIInt     size, rank;
25*c0c276a7Ssdargavi 
26*c0c276a7Ssdargavi   // ~~~~~~~~~~~~~~~~~~~~~
27*c0c276a7Ssdargavi   // This test shows the routines needed to build a kokkos matrix without preallocation
28*c0c276a7Ssdargavi   // on the host
29*c0c276a7Ssdargavi   // ~~~~~~~~~~~~~~~~~~~~~
30*c0c276a7Ssdargavi 
31*c0c276a7Ssdargavi   PetscFunctionBeginUser;
32*c0c276a7Ssdargavi   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
33*c0c276a7Ssdargavi   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
34*c0c276a7Ssdargavi   PetscCheck(size > 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Must run with 2 or more processes");
35*c0c276a7Ssdargavi   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
36*c0c276a7Ssdargavi 
37*c0c276a7Ssdargavi   /* Create a mpiaij matrix for checking */
38*c0c276a7Ssdargavi   PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, 5, 5, PETSC_DECIDE, PETSC_DECIDE, 0, NULL, 0, NULL, &A));
39*c0c276a7Ssdargavi   PetscCall(MatSetFromOptions(A));
40*c0c276a7Ssdargavi   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
41*c0c276a7Ssdargavi   PetscCall(MatSetUp(A));
42*c0c276a7Ssdargavi   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
43*c0c276a7Ssdargavi   PetscCall(PetscRandomSetFromOptions(rctx));
44*c0c276a7Ssdargavi 
45*c0c276a7Ssdargavi   for (i = 5 * rank; i < 5 * rank + 5; i++) {
46*c0c276a7Ssdargavi     for (j = 0; j < 5 * size; j++) {
47*c0c276a7Ssdargavi       PetscCall(PetscRandomGetValue(rctx, &value));
48*c0c276a7Ssdargavi       column = (PetscInt)(5 * size * PetscRealPart(value));
49*c0c276a7Ssdargavi       PetscCall(PetscRandomGetValue(rctx, &value));
50*c0c276a7Ssdargavi       PetscCall(MatSetValues(A, 1, &i, 1, &column, &value, INSERT_VALUES));
51*c0c276a7Ssdargavi     }
52*c0c276a7Ssdargavi   }
53*c0c276a7Ssdargavi   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
54*c0c276a7Ssdargavi   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
55*c0c276a7Ssdargavi 
56*c0c276a7Ssdargavi   PetscCall(MatGetSize(A, &M, &N));
57*c0c276a7Ssdargavi   PetscCall(MatGetLocalSize(A, &m, &n));
58*c0c276a7Ssdargavi   PetscCall(MatMPIAIJGetSeqAIJ(A, &AA, &AB, &garray));
59*c0c276a7Ssdargavi   PetscCall(MatGetRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done));
60*c0c276a7Ssdargavi   PetscCall(MatSeqAIJGetArray(AA, &da));
61*c0c276a7Ssdargavi   PetscCall(MatGetRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done));
62*c0c276a7Ssdargavi   PetscCall(MatSeqAIJGetArray(AB, &oa));
63*c0c276a7Ssdargavi   PetscCall(MatGetSize(AB, &m_ab, &n_ab));
64*c0c276a7Ssdargavi 
65*c0c276a7Ssdargavi   Mat output_mat_local, output_mat_nonlocal;
66*c0c276a7Ssdargavi   // Be careful about scope given the kokkos memory reference counts
67*c0c276a7Ssdargavi   {
68*c0c276a7Ssdargavi     // Local
69*c0c276a7Ssdargavi     Kokkos::View<PetscScalar *> a_local_d;
70*c0c276a7Ssdargavi     Kokkos::View<PetscInt *>    i_local_d;
71*c0c276a7Ssdargavi     Kokkos::View<PetscInt *>    j_local_d;
72*c0c276a7Ssdargavi 
73*c0c276a7Ssdargavi     // Nonlocal
74*c0c276a7Ssdargavi     Kokkos::View<PetscScalar *> a_nonlocal_d;
75*c0c276a7Ssdargavi     Kokkos::View<PetscInt *>    i_nonlocal_d;
76*c0c276a7Ssdargavi     Kokkos::View<PetscInt *>    j_nonlocal_d;
77*c0c276a7Ssdargavi 
78*c0c276a7Ssdargavi     // Create device memory
79*c0c276a7Ssdargavi     PetscCallCXX(a_local_d = Kokkos::View<PetscScalar *>("a_local_d", di[5]));
80*c0c276a7Ssdargavi     PetscCallCXX(i_local_d = Kokkos::View<PetscInt *>("i_local_d", m + 1));
81*c0c276a7Ssdargavi     PetscCallCXX(j_local_d = Kokkos::View<PetscInt *>("j_local_d", di[5]));
82*c0c276a7Ssdargavi 
83*c0c276a7Ssdargavi     // Create non-local device memory
84*c0c276a7Ssdargavi     PetscCallCXX(a_nonlocal_d = Kokkos::View<PetscScalar *>("a_nonlocal_d", oi[5]));
85*c0c276a7Ssdargavi     PetscCallCXX(i_nonlocal_d = Kokkos::View<PetscInt *>("i_nonlocal_d", m + 1));
86*c0c276a7Ssdargavi     PetscCallCXX(j_nonlocal_d = Kokkos::View<PetscInt *>("j_nonlocal_d", oi[5]));
87*c0c276a7Ssdargavi 
88*c0c276a7Ssdargavi     // ~~~~~~~~~~~~~~~~~~~~~
89*c0c276a7Ssdargavi     // Could fill the aij on the device - we're just going to test
90*c0c276a7Ssdargavi     // by copying in the existing host values
91*c0c276a7Ssdargavi     // ~~~~~~~~~~~~~~~~~~~~~
92*c0c276a7Ssdargavi     Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_local_h;
93*c0c276a7Ssdargavi     Kokkos::View<PetscInt *, HostMirrorMemorySpace>    i_local_h;
94*c0c276a7Ssdargavi     Kokkos::View<PetscInt *, HostMirrorMemorySpace>    j_local_h;
95*c0c276a7Ssdargavi     Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_nonlocal_h;
96*c0c276a7Ssdargavi     Kokkos::View<PetscInt *, HostMirrorMemorySpace>    i_nonlocal_h;
97*c0c276a7Ssdargavi     Kokkos::View<PetscInt *, HostMirrorMemorySpace>    j_nonlocal_h;
98*c0c276a7Ssdargavi 
99*c0c276a7Ssdargavi     PetscCallCXX(a_local_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(da, di[5]));
100*c0c276a7Ssdargavi     PetscCallCXX(i_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(di, m + 1));
101*c0c276a7Ssdargavi     PetscCallCXX(j_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(dj, di[5]));
102*c0c276a7Ssdargavi     PetscCallCXX(a_nonlocal_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(oa, oi[5]));
103*c0c276a7Ssdargavi     PetscCallCXX(i_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oi, m + 1));
104*c0c276a7Ssdargavi     PetscCallCXX(j_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oj, oi[5]));
105*c0c276a7Ssdargavi 
106*c0c276a7Ssdargavi     // Haven't specified an exec space so these will all be synchronous
107*c0c276a7Ssdargavi     // and finish without a need to call fence after
108*c0c276a7Ssdargavi     PetscCallCXX(Kokkos::deep_copy(a_local_d, a_local_h));
109*c0c276a7Ssdargavi     PetscCallCXX(Kokkos::deep_copy(i_local_d, i_local_h));
110*c0c276a7Ssdargavi     PetscCallCXX(Kokkos::deep_copy(j_local_d, j_local_h));
111*c0c276a7Ssdargavi     PetscCallCXX(Kokkos::deep_copy(a_nonlocal_d, a_nonlocal_h));
112*c0c276a7Ssdargavi     PetscCallCXX(Kokkos::deep_copy(i_nonlocal_d, i_nonlocal_h));
113*c0c276a7Ssdargavi     PetscCallCXX(Kokkos::deep_copy(j_nonlocal_d, j_nonlocal_h));
114*c0c276a7Ssdargavi 
115*c0c276a7Ssdargavi     // The garray passed in has to be on the host, but it can be created
116*c0c276a7Ssdargavi     // on device and copied to the host
117*c0c276a7Ssdargavi     // We're just going to copy the existing host values here
118*c0c276a7Ssdargavi     PetscCall(PetscMalloc1(n_ab, &garray_h));
119*c0c276a7Ssdargavi     for (int i = 0; i < n_ab; i++) { garray_h[i] = garray[i]; }
120*c0c276a7Ssdargavi 
121*c0c276a7Ssdargavi     // ~~~~~~~~~~~~~~~~~~~~~
122*c0c276a7Ssdargavi 
123*c0c276a7Ssdargavi     // ~~~~~~~~~~~~~~~~~
124*c0c276a7Ssdargavi     // Test MatCreateSeqAIJKokkosWithKokkosViews
125*c0c276a7Ssdargavi     // ~~~~~~~~~~~~~~~~~
126*c0c276a7Ssdargavi 
127*c0c276a7Ssdargavi     // We can create our local diagonal block matrix directly on the device
128*c0c276a7Ssdargavi     PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n, i_local_d, j_local_d, a_local_d, &output_mat_local));
129*c0c276a7Ssdargavi 
130*c0c276a7Ssdargavi     // We can create our nonlocal diagonal block matrix directly on the device
131*c0c276a7Ssdargavi     PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n_ab, i_nonlocal_d, j_nonlocal_d, a_nonlocal_d, &output_mat_nonlocal));
132*c0c276a7Ssdargavi 
133*c0c276a7Ssdargavi     // Build our MPI matrix
134*c0c276a7Ssdargavi     // If we provide garray and output_mat_nonlocal with local indices and the compactified size
135*c0c276a7Ssdargavi     // almost nothing happens on the host
136*c0c276a7Ssdargavi     PetscCall(MatCreateMPIAIJWithSeqAIJ(PETSC_COMM_WORLD, M, N, output_mat_local, output_mat_nonlocal, garray_h, &B));
137*c0c276a7Ssdargavi 
138*c0c276a7Ssdargavi     PetscCall(MatEqual(A, B, &equal));
139*c0c276a7Ssdargavi     PetscCall(MatRestoreRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done));
140*c0c276a7Ssdargavi     PetscCall(MatSeqAIJRestoreArray(AA, &da));
141*c0c276a7Ssdargavi     PetscCall(MatRestoreRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done));
142*c0c276a7Ssdargavi     PetscCall(MatSeqAIJRestoreArray(AB, &oa));
143*c0c276a7Ssdargavi 
144*c0c276a7Ssdargavi     PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely a bug in MatCreateSeqAIJKokkosWithKokkosViews()");
145*c0c276a7Ssdargavi     PetscCall(MatDestroy(&B));
146*c0c276a7Ssdargavi 
147*c0c276a7Ssdargavi     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
148*c0c276a7Ssdargavi 
149*c0c276a7Ssdargavi     /* Free spaces */
150*c0c276a7Ssdargavi     PetscCall(PetscRandomDestroy(&rctx));
151*c0c276a7Ssdargavi     PetscCall(MatDestroy(&A));
152*c0c276a7Ssdargavi   }
153*c0c276a7Ssdargavi   PetscCall(PetscFinalize());
154*c0c276a7Ssdargavi   return 0;
155*c0c276a7Ssdargavi }
156*c0c276a7Ssdargavi 
157*c0c276a7Ssdargavi /*TEST
158*c0c276a7Ssdargavi   build:
159*c0c276a7Ssdargavi     requires: kokkos_kernels
160*c0c276a7Ssdargavi 
161*c0c276a7Ssdargavi   test:
162*c0c276a7Ssdargavi     nsize: 2
163*c0c276a7Ssdargavi     args: -mat_type aijkokkos
164*c0c276a7Ssdargavi     requires: kokkos_kernels
165*c0c276a7Ssdargavi     output_file: output/empty.out
166*c0c276a7Ssdargavi 
167*c0c276a7Ssdargavi TEST*/
168