Lines Matching refs:Kokkos
100 /* the a->diag is created at assmebly here because the rest of the Kokkos AIJ code assumes it always exists. This needs to be fixed since it is now only created when needed! */
103 auto diag_d = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), diag_h);
277 PetscCallCXX(Kokkos::deep_copy(Ti_h, 0));
301 auto Ti_d = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), Ti_h);
302 auto Tj_d = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), Tj_h);
304 PetscCallCXX(perm_d = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), perm_h));
332 PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, nz), KOKKOS_LAMBDA(const PetscInt i) { Ta(i) = Aa(perm(i)); }));
339 PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, nz), KOKKOS_LAMBDA(const PetscInt i) { T.values(i) = Aa(perm(i)); }));
370 PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, nz), KOKKOS_LAMBDA(const PetscInt i) { Ta(i) = PetscConj(Aa(perm(i))); }));
377 PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, nz), KOKKOS_LAMBDA(const PetscInt i) { T.values(i) = PetscConj(Aa(perm(i))); }));
616 Kokkos::deep_copy(bkok->a_dual.view_host(), akok->a_dual.view_host());
619 Kokkos::deep_copy(bkok->a_dual.view_device(), akok->a_dual.view_device());
654 PetscCallCXX(Kokkos::deep_copy(bkok->a_dual.view_device(), internT.values));
660 PetscCallCXX(Kokkos::deep_copy(a_h, internT.values));
661 PetscCallCXX(Kokkos::deep_copy(j_h, internT.graph.entries));
690 MATSEQAIJKOKKOS - MATAIJKOKKOS = "(seq)aijkokkos" - A matrix type to be used for sparse matrices with Kokkos
692 A matrix type using Kokkos-Kernels CrsMatrix type for portability across different device types
750 /* Concatenate A and B in parallel using Kokkos hierarchical parallelism */
751 Kokkos::parallel_for(
752 Kokkos::TeamPolicy<>(PetscGetKokkosExecutionSpace(), m, Kokkos::AUTO()), KOKKOS_LAMBDA(const KokkosTeamMemberType &t) {
756 Kokkos::single(Kokkos::PerTeam(t), [=]() { /* this side effect only happens once per whole team */
761 Kokkos::parallel_for(Kokkos::TeamThreadRange(t, alen + blen), [&](PetscInt k) {
780 Kokkos::parallel_for(
781 Kokkos::TeamPolicy<>(PetscGetKokkosExecutionSpace(), m, Kokkos::AUTO()), KOKKOS_LAMBDA(const KokkosTeamMemberType &t) {
784 Kokkos::parallel_for(Kokkos::TeamThreadRange(t, alen + blen), [&](PetscInt k) {
1048 PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, n), KOKKOS_LAMBDA(const PetscInt i) { Aa(Adiag(i)) += a; }));
1077 PetscCallCXX(Kokkos::parallel_for(
1078 Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, n), KOKKOS_LAMBDA(const PetscInt i) {
1109 PetscCallCXX(Kokkos::parallel_for( // for each row
1110 Kokkos::TeamPolicy<>(PetscGetKokkosExecutionSpace(), m, Kokkos::AUTO()), KOKKOS_LAMBDA(const KokkosTeamMemberType &t) {
1114 Kokkos::parallel_for(Kokkos::TeamThreadRange(t, len), [&](PetscInt j) { Aa(Ai(i) + j) *= lv(i); });
1123 PetscCallCXX(Kokkos::parallel_for( // for each nonzero
1124 Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, nz), KOKKOS_LAMBDA(const PetscInt k) { Aa(k) *= rv(Aj(k)); }));
1167 Kokkos::parallel_for(
1168 Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, n), KOKKOS_LAMBDA(const PetscInt i) {
1176 /* Get a Kokkos View from a mat of type MatSeqAIJKokkos */
1247 PetscErrorCode MatCreateSeqAIJKokkosWithKokkosViews(MPI_Comm comm, PetscInt m, PetscInt n, Kokkos::View<PetscInt *> &i_d, Kokkos::View<PetscInt *> &j_d, Kokkos::View<PetscScalar *> &a_d, Mat *A)
1300 Kokkos::parallel_for(
1301 Kokkos::TeamPolicy<>(exec, Y->rmap->n, 1), KOKKOS_LAMBDA(const KokkosTeamMemberType &t) {
1303 Kokkos::single(Kokkos::PerTeam(t), [=]() {
1317 if (Yi(i) != Yi(i + 1)) Ya(Yi(i)) = Kokkos::ArithTraits<PetscScalar>::nan();
1319 if (Yi(i) != Yi(i + 1)) Ya(Yi(i)) = Kokkos::Experimental::nan("1");
1355 jmap = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), PetscCountKokkosViewHost(coo_h->jmap, nz + 1));
1356 perm = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), PetscCountKokkosViewHost(coo_h->perm, Atot));
1412 kv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), ConstMatScalarKokkosViewHost(v, n));
1421 Kokkos::parallel_for(
1422 Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, Annz), KOKKOS_LAMBDA(const PetscCount i) {
1507 auto ts = Kokkos::AUTO();
1509 auto ts = 16; // improved performance 30% over Kokkos::AUTO() with CUDA, but failed with "Kokkos::abort: Requested Team Size is too large!" on CPUs
1511 PetscCallCXX(Kokkos::parallel_for(
1512 Kokkos::TeamPolicy<>(PetscGetKokkosExecutionSpace(), nblocks, ts), KOKKOS_LAMBDA(const KokkosTeamMemberType &teamMember) {
1516 const auto &B = Kokkos::View<PetscScalar **, Kokkos::LayoutLeft>(&diagVal(bs2(bid)), m, m); // column-major order
1519 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, m), [=](const PetscInt &r) { // r-th row in B
1629 Kokkos for calculations.
1722 PetscCallCXX(Kokkos::deep_copy(factors->iUt_d, factors->iUt_h));
1723 PetscCallCXX(Kokkos::deep_copy(factors->jUt_d, factors->jUt_h));
1724 PetscCallCXX(Kokkos::deep_copy(factors->aUt_d, factors->aUt_h));
1759 PetscCallCXX(Kokkos::deep_copy(factors->iLt_d, factors->iLt_h));
1760 PetscCallCXX(Kokkos::deep_copy(factors->jLt_d, factors->jLt_h));
1761 PetscCallCXX(Kokkos::deep_copy(factors->aLt_d, factors->aLt_h));
1805 PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(exec, 0, m), KOKKOS_LAMBDA(const PetscInt i) { B(i) = b(rowperm(i)); }));
1812 PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(exec, 0, m), KOKKOS_LAMBDA(const PetscInt i) { Y(i) = Y(i) * D(i); }));
1823 if (!identity) PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(exec, 0, m), KOKKOS_LAMBDA(const PetscInt i) { x(rowperm(i)) = X(i); }));
1859 PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(exec, 0, m), KOKKOS_LAMBDA(const PetscInt i) { B(i) = b(rowperm(i)); }));
1873 if (!col_identity) PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(exec, 0, m), KOKKOS_LAMBDA(const PetscInt i) { x(colperm(i)) = X(i); }));
1910 PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(exec, 0, m), KOKKOS_LAMBDA(const PetscInt i) { B(i) = b(colperm(i)); }));
1924 if (!row_identity) PetscCallCXX(Kokkos::parallel_for(Kokkos::RangePolicy<>(exec, 0, m), KOKKOS_LAMBDA(const PetscInt i) { x(rowperm(i)) = X(i); }));
1974 factors->iL_d = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), factors->iL_h);
1975 factors->jL_d = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), factors->jL_h);
1976 factors->iU_d = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), factors->iU_h);
1977 factors->jU_d = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), factors->jU_h);
1978 factors->aL_d = Kokkos::create_mirror_view(DefaultMemorySpace(), factors->aL_h);
1979 factors->aU_d = Kokkos::create_mirror_view(DefaultMemorySpace(), factors->aU_h);
1990 PetscCallCXX(Kokkos::deep_copy(factors->rowperm, PetscIntKokkosViewHost(const_cast<PetscInt *>(ip), m)));
2003 PetscCallCXX(Kokkos::deep_copy(factors->colperm, PetscIntKokkosViewHost(const_cast<PetscInt *>(ip), n)));
2037 PetscCallCXX(Kokkos::deep_copy(factors->aL_d, factors->aL_h));
2038 PetscCallCXX(Kokkos::deep_copy(factors->aU_d, factors->aU_h));
2104 Kokkos::realloc(factors->iL_d, n + 1); /* Free old arrays and realloc */
2105 Kokkos::realloc(factors->jL_d, spiluk_handle->get_nnzL());
2106 Kokkos::realloc(factors->iU_d, n + 1);
2107 Kokkos::realloc(factors->jU_d, spiluk_handle->get_nnzU());
2115 Kokkos::resize(factors->jL_d, spiluk_handle->get_nnzL()); /* Shrink or expand, and retain old value */
2116 Kokkos::resize(factors->jU_d, spiluk_handle->get_nnzU());
2117 Kokkos::realloc(factors->aL_d, spiluk_handle->get_nnzL()); /* No need to retain old value */
2118 Kokkos::realloc(factors->aU_d, spiluk_handle->get_nnzU());
2195 factors->aU_d = Kokkos::create_mirror_view(DefaultMemorySpace(), factors->aU_h);
2196 factors->D_d = Kokkos::create_mirror_view(DefaultMemorySpace(), factors->D_h);
2207 PetscCallCXX(factors->iU_d = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), factors->iU_h));
2208 PetscCallCXX(factors->jU_d = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), factors->jU_h));
2219 PetscCallCXX(Kokkos::deep_copy(factors->rowperm, PetscIntKokkosViewHost(const_cast<PetscInt *>(ip), m)));
2245 PetscCallCXX(Kokkos::deep_copy(factors->aU_d, factors->aU_h));
2246 PetscCallCXX(Kokkos::deep_copy(factors->D_d, factors->D_h));
2297 // The _Kokkos suffix means we will use Kokkos as a solver for the SeqAIJKokkos matrix
2306 MATSOLVERKOKKOS = "Kokkos" - A matrix solver type providing triangular solvers for sequential matrices
2360 const auto &iv = Kokkos::create_mirror_view_and_copy(HostMirrorMemorySpace(), csrmat.graph.row_map);
2361 const auto &jv = Kokkos::create_mirror_view_and_copy(HostMirrorMemorySpace(), csrmat.graph.entries);
2362 const auto &av = Kokkos::create_mirror_view_and_copy(HostMirrorMemorySpace(), csrmat.values);