xref: /libCEED/include/ceed/jit-source/hip/hip-shared-basis-nontensor-templates.h (revision 6c13bbcb075a8bd926202fb66e317904cf28c80e)
1*6c13bbcbSJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
2*6c13bbcbSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3*6c13bbcbSJeremy L Thompson //
4*6c13bbcbSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
5*6c13bbcbSJeremy L Thompson //
6*6c13bbcbSJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
7*6c13bbcbSJeremy L Thompson 
8*6c13bbcbSJeremy L Thompson /// @file
9*6c13bbcbSJeremy L Thompson /// Internal header for HIP shared memory non-tensor basis templates
10*6c13bbcbSJeremy L Thompson #include <ceed/types.h>
11*6c13bbcbSJeremy L Thompson 
12*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
13*6c13bbcbSJeremy L Thompson // 1D tensor contraction
14*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
15*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D>
16*6c13bbcbSJeremy L Thompson inline __device__ void Contract1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
17*6c13bbcbSJeremy L Thompson   data.slice[data.t_id_x] = *U;
18*6c13bbcbSJeremy L Thompson   __syncthreads();
19*6c13bbcbSJeremy L Thompson   *V = 0.0;
20*6c13bbcbSJeremy L Thompson   if (data.t_id_x < Q_1D) {
21*6c13bbcbSJeremy L Thompson     for (CeedInt i = 0; i < P_1D; i++) {
22*6c13bbcbSJeremy L Thompson       *V += B[i + data.t_id_x * P_1D] * data.slice[i];  // Contract x direction
23*6c13bbcbSJeremy L Thompson     }
24*6c13bbcbSJeremy L Thompson   }
25*6c13bbcbSJeremy L Thompson   __syncthreads();
26*6c13bbcbSJeremy L Thompson }
27*6c13bbcbSJeremy L Thompson 
28*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
29*6c13bbcbSJeremy L Thompson // 1D transpose tensor contraction
30*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
31*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D>
32*6c13bbcbSJeremy L Thompson inline __device__ void ContractTranspose1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
33*6c13bbcbSJeremy L Thompson   data.slice[data.t_id_x] = *U;
34*6c13bbcbSJeremy L Thompson   __syncthreads();
35*6c13bbcbSJeremy L Thompson   if (data.t_id_x < P_1D) {
36*6c13bbcbSJeremy L Thompson     for (CeedInt i = 0; i < Q_1D; i++) {
37*6c13bbcbSJeremy L Thompson       *V += B[data.t_id_x + i * P_1D] * data.slice[i];  // Contract x direction
38*6c13bbcbSJeremy L Thompson     }
39*6c13bbcbSJeremy L Thompson   }
40*6c13bbcbSJeremy L Thompson   __syncthreads();
41*6c13bbcbSJeremy L Thompson }
42*6c13bbcbSJeremy L Thompson 
43*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
44*6c13bbcbSJeremy L Thompson // Interpolate to quadrature points
45*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
46*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P, int Q>
47*6c13bbcbSJeremy L Thompson inline __device__ void Interp1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) {
48*6c13bbcbSJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
49*6c13bbcbSJeremy L Thompson     Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
50*6c13bbcbSJeremy L Thompson   }
51*6c13bbcbSJeremy L Thompson }
52*6c13bbcbSJeremy L Thompson 
53*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
54*6c13bbcbSJeremy L Thompson // Interpolate transpose
55*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
56*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P, int Q>
57*6c13bbcbSJeremy L Thompson inline __device__ void InterpTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
58*6c13bbcbSJeremy L Thompson                                          CeedScalar *__restrict__ r_V) {
59*6c13bbcbSJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
60*6c13bbcbSJeremy L Thompson     r_V[comp] = 0.0;
61*6c13bbcbSJeremy L Thompson     ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
62*6c13bbcbSJeremy L Thompson   }
63*6c13bbcbSJeremy L Thompson }
64*6c13bbcbSJeremy L Thompson 
65*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
66*6c13bbcbSJeremy L Thompson // Derivatives at quadrature points
67*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
68*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P, int Q>
69*6c13bbcbSJeremy L Thompson inline __device__ void Grad1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
70*6c13bbcbSJeremy L Thompson                               CeedScalar *__restrict__ r_V) {
71*6c13bbcbSJeremy L Thompson   for (CeedInt dim = 0; dim < DIM; dim++) {
72*6c13bbcbSJeremy L Thompson     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
73*6c13bbcbSJeremy L Thompson       Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]);
74*6c13bbcbSJeremy L Thompson     }
75*6c13bbcbSJeremy L Thompson   }
76*6c13bbcbSJeremy L Thompson }
77*6c13bbcbSJeremy L Thompson 
78*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
79*6c13bbcbSJeremy L Thompson // Derivatives transpose
80*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
81*6c13bbcbSJeremy L Thompson template <int NUM_COMP, int P, int Q>
82*6c13bbcbSJeremy L Thompson inline __device__ void GradTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
83*6c13bbcbSJeremy L Thompson                                        CeedScalar *__restrict__ r_V) {
84*6c13bbcbSJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0;
85*6c13bbcbSJeremy L Thompson   for (CeedInt dim = 0; dim < DIM; dim++) {
86*6c13bbcbSJeremy L Thompson     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
87*6c13bbcbSJeremy L Thompson       ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[comp]);
88*6c13bbcbSJeremy L Thompson     }
89*6c13bbcbSJeremy L Thompson   }
90*6c13bbcbSJeremy L Thompson }
91*6c13bbcbSJeremy L Thompson 
92*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
93*6c13bbcbSJeremy L Thompson // Quadrature weights
94*6c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
95*6c13bbcbSJeremy L Thompson template <int Q>
96*6c13bbcbSJeremy L Thompson inline __device__ void Weight1d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) {
97*6c13bbcbSJeremy L Thompson   *w = (data.t_id_x < Q) ? q_weight_1d[data.t_id_x] : 0.0;
98*6c13bbcbSJeremy L Thompson }
99