Lines Matching refs:Nb
137 PetscErrorCode LandauKokkosStaticDataSet(DM plex, const PetscInt Nq, const PetscInt Nb, const PetscInt batch_sz, const PetscInt num_grids, PetscInt a_numCells[], PetscInt a_species_offset[], PetscInt a_mat_offset[], PetscReal a_nu_alpha[], PetscReal a_nu_beta[], PetscReal a_invMass[], PetscReal a_lambdas[], PetscReal a_invJ[], PetscReal a_x[], PetscReal a_y[], PetscReal a_z[], PetscReal a_w[], LandauStaticData *SData_d)
178 const Kokkos::View<PetscReal *, Kokkos::LayoutLeft, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> h_BB(BB, Nq * Nb);
179 auto B = new Kokkos::View<PetscReal *, Kokkos::LayoutLeft>("B", Nq * Nb);
181 const Kokkos::View<PetscReal *, Kokkos::LayoutLeft, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> h_DD(DD, Nq * Nb * dim);
182 auto D = new Kokkos::View<PetscReal *, Kokkos::LayoutLeft>("D", Nq * Nb * dim);
343 PetscErrorCode landau_mat_assemble(PetscScalar *coo_vals, const PetscScalar Aij, const PetscInt f, const PetscInt g, const PetscInt Nb, PetscInt moffset, const PetscInt elem, const PetscInt fieldA, const P4estVertexMaps *d_maps, const LandauIdx coo_elem_offsets[], const LandauIdx coo_elem_fullNb[], const LandauIdx (*coo_elem_point_offsets)[LANDAU_MAX_NQND + 1], const PetscInt glb_elem_idx, const PetscInt bid_coo_sz_batch)
381 PetscErrorCode LandauKokkosJacobian(DM plex[], const PetscInt Nq, const PetscInt Nb, const PetscInt batch_sz, const PetscInt num_grids, const PetscInt a_numCells[], PetscReal a_Eq_m[], PetscScalar a_elem_closure[], const PetscScalar a_xarray[], const LandauStaticData *SData_d, const PetscReal shift, const PetscLogEvent events[], const PetscInt a_mat_offset[], const PetscInt a_species_offset[], Mat subJ[], Mat JacP)
480 const int totDim_max = Nf_max * Nb, elem_mat_size_max = totDim_max * totDim_max;
497 closure_sz += Nb * nfloc * a_numCells[grid];
511 const int scr_bytes_fdf = real2_scr_t::shmem_size(Nf_max, Nb);
523 real2_scr_t s_coef_k(team.team_scratch(KOKKOS_SHARED_LEVEL), Nf_max, Nb);
528 coef = &d_vertex_f[b_id * IPf_sz_glb + d_ipf_offset[grid] + loc_elem * Nb * loc_Nf]; // closure and IP indexing are the same
533 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, 0, (int)Nb), [=](int b) {
534 //for (int b = 0; b < Nb; ++b) {
537 coef[f * Nb + b] = d_vertex_f[idx + moffset]; // xarray data, not IP, need raw array to deal with CPU assemble case (not used)
540 coef[f * Nb + b] = 0;
544 if (id >= 0) coef[f * Nb + b] += scale * d_vertex_f[id + moffset];
553 const PetscReal *Bq = &d_BB[myQi * Nb], *Dq = &d_DD[myQi * Nb * dim];
560 for (b = 0; b < Nb; ++b) {
561 d_fdf_k(b_id, 0, idx) += Bq[b] * PetscRealPart(coef[f * Nb + b]);
562 for (d = 0; d < dim; ++d) refSpaceDer[d] += Dq[b * dim + d] * PetscRealPart(coef[f * Nb + b]);
596 const PetscInt totDim = loc_Nf * Nb;
696 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0, Nb), [=](int f) {
697 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, 0, Nb), [=](int g) {
700 const PetscReal *BJq = &d_BB[qj * Nb], *DIq = &d_DD[qj * Nb * dim];
708 const PetscInt fOff = (fieldA * Nb + f) * totDim + fieldA * Nb + g;
712 static_cast<void>(landau_mat_assemble(d_coo_vals, t, f, g, Nb, moffset, loc_elem, fieldA, maps[grid], d_coo_elem_offsets, d_coo_elem_fullNb, d_coo_elem_point_offsets, b_elem_idx, b_id * coo_sz_batch));
736 PetscCall(PetscInfo(plex[0], "Mass team size=%d vector size=%d #face=%d Nb=%" PetscInt_FMT ", %s assembly\n", team_size, vector_size, nfaces, Nb, d_coo_vals ? "COO" : "CSR"));
746 const PetscInt totDim = loc_Nf * Nb;
751 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0, Nb), [=](int f) {
752 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, 0, Nb), [=](int g) {
755 const PetscReal *BJq = &d_BB[qj * Nb];
765 const PetscInt fOff = (fieldA * Nb + f) * totDim + fieldA * Nb + g;
769 static_cast<void>(landau_mat_assemble(d_coo_vals, t, f, g, Nb, moffset, loc_elem, fieldA, maps[grid], d_coo_elem_offsets, d_coo_elem_fullNb, d_coo_elem_point_offsets, b_elem_idx, b_id * coo_sz_batch));
808 const PetscInt loc_Nf = a_species_offset[grid + 1] - a_species_offset[grid], totDim = loc_Nf * Nb;