1*9e201c85SYohann // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2*9e201c85SYohann // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3*9e201c85SYohann // 4*9e201c85SYohann // SPDX-License-Identifier: BSD-2-Clause 5*9e201c85SYohann // 6*9e201c85SYohann // This file is part of CEED: http://github.com/ceed 7*9e201c85SYohann 8*9e201c85SYohann /// @file 9*9e201c85SYohann /// Internal header for CUDA shared memory basis read/write templates 10*9e201c85SYohann #ifndef _ceed_cuda_shared_basis_read_write_templates_h 11*9e201c85SYohann #define _ceed_cuda_shared_basis_read_write_templates_h 12*9e201c85SYohann 13*9e201c85SYohann #include <ceed.h> 14*9e201c85SYohann 15*9e201c85SYohann //------------------------------------------------------------------------------ 16*9e201c85SYohann // 1D 17*9e201c85SYohann //------------------------------------------------------------------------------ 18*9e201c85SYohann 19*9e201c85SYohann //------------------------------------------------------------------------------ 20*9e201c85SYohann // E-vector -> single element 21*9e201c85SYohann //------------------------------------------------------------------------------ 22*9e201c85SYohann template <int NUM_COMP, int P_1D> 23*9e201c85SYohann inline __device__ void ReadElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 24*9e201c85SYohann if (data.t_id_x < P_1D) { 25*9e201c85SYohann const CeedInt node = data.t_id_x; 26*9e201c85SYohann const CeedInt ind = node * strides_node + elem * strides_elem; 27*9e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 28*9e201c85SYohann r_u[comp] = d_u[ind + comp * strides_comp]; 29*9e201c85SYohann } 30*9e201c85SYohann } 31*9e201c85SYohann } 32*9e201c85SYohann 33*9e201c85SYohann //------------------------------------------------------------------------------ 34*9e201c85SYohann // Single element -> E-vector 35*9e201c85SYohann //------------------------------------------------------------------------------ 36*9e201c85SYohann template <int NUM_COMP, int P_1D> 37*9e201c85SYohann inline __device__ void WriteElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 38*9e201c85SYohann if (data.t_id_x < P_1D) { 39*9e201c85SYohann const CeedInt node = data.t_id_x; 40*9e201c85SYohann const CeedInt ind = node * strides_node + elem * strides_elem; 41*9e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 42*9e201c85SYohann d_v[ind + comp * strides_comp] = r_v[comp]; 43*9e201c85SYohann } 44*9e201c85SYohann } 45*9e201c85SYohann } 46*9e201c85SYohann 47*9e201c85SYohann //------------------------------------------------------------------------------ 48*9e201c85SYohann // 2D 49*9e201c85SYohann //------------------------------------------------------------------------------ 50*9e201c85SYohann 51*9e201c85SYohann //------------------------------------------------------------------------------ 52*9e201c85SYohann // E-vector -> single element 53*9e201c85SYohann //------------------------------------------------------------------------------ 54*9e201c85SYohann template <int NUM_COMP, int P_1D> 55*9e201c85SYohann inline __device__ void ReadElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 56*9e201c85SYohann if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 57*9e201c85SYohann const CeedInt node = data.t_id_x + data.t_id_y*P_1D; 58*9e201c85SYohann const CeedInt ind = node * strides_node + elem * strides_elem; 59*9e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 60*9e201c85SYohann r_u[comp] = d_u[ind + comp * strides_comp]; 61*9e201c85SYohann } 62*9e201c85SYohann } 63*9e201c85SYohann } 64*9e201c85SYohann 65*9e201c85SYohann //------------------------------------------------------------------------------ 66*9e201c85SYohann // Single element -> E-vector 67*9e201c85SYohann //------------------------------------------------------------------------------ 68*9e201c85SYohann template <int NUM_COMP, int P_1D> 69*9e201c85SYohann inline __device__ void WriteElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 70*9e201c85SYohann if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 71*9e201c85SYohann const CeedInt node = data.t_id_x + data.t_id_y*P_1D; 72*9e201c85SYohann const CeedInt ind = node * strides_node + elem * strides_elem; 73*9e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 74*9e201c85SYohann d_v[ind + comp * strides_comp] = r_v[comp]; 75*9e201c85SYohann } 76*9e201c85SYohann } 77*9e201c85SYohann } 78*9e201c85SYohann 79*9e201c85SYohann //------------------------------------------------------------------------------ 80*9e201c85SYohann // 3D 81*9e201c85SYohann //------------------------------------------------------------------------------ 82*9e201c85SYohann 83*9e201c85SYohann //------------------------------------------------------------------------------ 84*9e201c85SYohann // E-vector -> single element 85*9e201c85SYohann //------------------------------------------------------------------------------ 86*9e201c85SYohann template <int NUM_COMP, int P_1D> 87*9e201c85SYohann inline __device__ void ReadElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 88*9e201c85SYohann if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 89*9e201c85SYohann for (CeedInt z = 0; z < P_1D; z++) { 90*9e201c85SYohann const CeedInt node = data.t_id_x + data.t_id_y*P_1D + z*P_1D*P_1D; 91*9e201c85SYohann const CeedInt ind = node * strides_node + elem * strides_elem; 92*9e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 93*9e201c85SYohann r_u[z + comp * P_1D] = d_u[ind + comp * strides_comp]; 94*9e201c85SYohann } 95*9e201c85SYohann } 96*9e201c85SYohann } 97*9e201c85SYohann } 98*9e201c85SYohann 99*9e201c85SYohann //------------------------------------------------------------------------------ 100*9e201c85SYohann // Single element -> E-vector 101*9e201c85SYohann //------------------------------------------------------------------------------ 102*9e201c85SYohann template <int NUM_COMP, int P_1D> 103*9e201c85SYohann inline __device__ void WriteElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 104*9e201c85SYohann if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 105*9e201c85SYohann for (CeedInt z = 0; z < P_1D; z++) { 106*9e201c85SYohann const CeedInt node = data.t_id_x + data.t_id_y*P_1D + z*P_1D*P_1D; 107*9e201c85SYohann const CeedInt ind = node * strides_node + elem * strides_elem; 108*9e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 109*9e201c85SYohann d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D]; 110*9e201c85SYohann } 111*9e201c85SYohann } 112*9e201c85SYohann } 113*9e201c85SYohann } 114*9e201c85SYohann 115*9e201c85SYohann //------------------------------------------------------------------------------ 116*9e201c85SYohann 117*9e201c85SYohann #endif 118