1*6c13bbcbSJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2*6c13bbcbSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3*6c13bbcbSJeremy L Thompson // 4*6c13bbcbSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 5*6c13bbcbSJeremy L Thompson // 6*6c13bbcbSJeremy L Thompson // This file is part of CEED: http://github.com/ceed 7*6c13bbcbSJeremy L Thompson 8*6c13bbcbSJeremy L Thompson /// @file 9*6c13bbcbSJeremy L Thompson /// Internal header for HIP shared memory non-tensor basis templates 10*6c13bbcbSJeremy L Thompson #include <ceed/types.h> 11*6c13bbcbSJeremy L Thompson 12*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 13*6c13bbcbSJeremy L Thompson // 1D tensor contraction 14*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 15*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D> 16*6c13bbcbSJeremy L Thompson inline __device__ void Contract1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 17*6c13bbcbSJeremy L Thompson data.slice[data.t_id_x] = *U; 18*6c13bbcbSJeremy L Thompson __syncthreads(); 19*6c13bbcbSJeremy L Thompson *V = 0.0; 20*6c13bbcbSJeremy L Thompson if (data.t_id_x < Q_1D) { 21*6c13bbcbSJeremy L Thompson for (CeedInt i = 0; i < P_1D; i++) { 22*6c13bbcbSJeremy L Thompson *V += B[i + data.t_id_x * P_1D] * data.slice[i]; // Contract x direction 23*6c13bbcbSJeremy L Thompson } 24*6c13bbcbSJeremy L Thompson } 25*6c13bbcbSJeremy L Thompson __syncthreads(); 26*6c13bbcbSJeremy L Thompson } 27*6c13bbcbSJeremy L Thompson 28*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 29*6c13bbcbSJeremy L Thompson // 1D transpose tensor contraction 30*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 31*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D> 32*6c13bbcbSJeremy L Thompson inline __device__ void ContractTranspose1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 33*6c13bbcbSJeremy L Thompson data.slice[data.t_id_x] = *U; 34*6c13bbcbSJeremy L Thompson __syncthreads(); 35*6c13bbcbSJeremy L Thompson if (data.t_id_x < P_1D) { 36*6c13bbcbSJeremy L Thompson for (CeedInt i = 0; i < Q_1D; i++) { 37*6c13bbcbSJeremy L Thompson *V += B[data.t_id_x + i * P_1D] * data.slice[i]; // Contract x direction 38*6c13bbcbSJeremy L Thompson } 39*6c13bbcbSJeremy L Thompson } 40*6c13bbcbSJeremy L Thompson __syncthreads(); 41*6c13bbcbSJeremy L Thompson } 42*6c13bbcbSJeremy L Thompson 43*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 44*6c13bbcbSJeremy L Thompson // Interpolate to quadrature points 45*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 46*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P, int Q> 47*6c13bbcbSJeremy L Thompson inline __device__ void Interp1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) { 48*6c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 49*6c13bbcbSJeremy L Thompson Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); 50*6c13bbcbSJeremy L Thompson } 51*6c13bbcbSJeremy L Thompson } 52*6c13bbcbSJeremy L Thompson 53*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 54*6c13bbcbSJeremy L Thompson // Interpolate transpose 55*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 56*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P, int Q> 57*6c13bbcbSJeremy L Thompson inline __device__ void InterpTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 58*6c13bbcbSJeremy L Thompson CeedScalar *__restrict__ r_V) { 59*6c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 60*6c13bbcbSJeremy L Thompson r_V[comp] = 0.0; 61*6c13bbcbSJeremy L Thompson ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); 62*6c13bbcbSJeremy L Thompson } 63*6c13bbcbSJeremy L Thompson } 64*6c13bbcbSJeremy L Thompson 65*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 66*6c13bbcbSJeremy L Thompson // Derivatives at quadrature points 67*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 68*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P, int Q> 69*6c13bbcbSJeremy L Thompson inline __device__ void Grad1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, 70*6c13bbcbSJeremy L Thompson CeedScalar *__restrict__ r_V) { 71*6c13bbcbSJeremy L Thompson for (CeedInt dim = 0; dim < DIM; dim++) { 72*6c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 73*6c13bbcbSJeremy L Thompson Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]); 74*6c13bbcbSJeremy L Thompson } 75*6c13bbcbSJeremy L Thompson } 76*6c13bbcbSJeremy L Thompson } 77*6c13bbcbSJeremy L Thompson 78*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 79*6c13bbcbSJeremy L Thompson // Derivatives transpose 80*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 81*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P, int Q> 82*6c13bbcbSJeremy L Thompson inline __device__ void GradTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, 83*6c13bbcbSJeremy L Thompson CeedScalar *__restrict__ r_V) { 84*6c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0; 85*6c13bbcbSJeremy L Thompson for (CeedInt dim = 0; dim < DIM; dim++) { 86*6c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 87*6c13bbcbSJeremy L Thompson ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[comp]); 88*6c13bbcbSJeremy L Thompson } 89*6c13bbcbSJeremy L Thompson } 90*6c13bbcbSJeremy L Thompson } 91*6c13bbcbSJeremy L Thompson 92*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 93*6c13bbcbSJeremy L Thompson // Quadrature weights 94*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 95*6c13bbcbSJeremy L Thompson template <int Q> 96*6c13bbcbSJeremy L Thompson inline __device__ void Weight1d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) { 97*6c13bbcbSJeremy L Thompson *w = (data.t_id_x < Q) ? q_weight_1d[data.t_id_x] : 0.0; 98*6c13bbcbSJeremy L Thompson } 99