xref: /libCEED/include/ceed/jit-source/sycl/sycl-shared-basis-tensor-templates.h (revision c0b5abf0f23b15c4f0ada76f8abe9f8d2b6fa247)
15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
2bd882c8aSJames Wright // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3bd882c8aSJames Wright //
4bd882c8aSJames Wright // SPDX-License-Identifier: BSD-2-Clause
5bd882c8aSJames Wright //
6bd882c8aSJames Wright // This file is part of CEED:  http://github.com/ceed
7bd882c8aSJames Wright 
8bd882c8aSJames Wright /// @file
9bd882c8aSJames Wright /// Internal header for SYCL shared memory tensor product basis templates
10bd882c8aSJames Wright 
11*c0b5abf0SJeremy L Thompson #include <ceed/types.h>
12bd882c8aSJames Wright 
13bd882c8aSJames Wright //------------------------------------------------------------------------------
14bd882c8aSJames Wright // 1D
15bd882c8aSJames Wright //------------------------------------------------------------------------------
16bd882c8aSJames Wright 
17bd882c8aSJames Wright //------------------------------------------------------------------------------
18bd882c8aSJames Wright // 1D tensor contraction x
19bd882c8aSJames Wright //------------------------------------------------------------------------------
20bd882c8aSJames Wright inline void ContractX1d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
21bd882c8aSJames Wright                         private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
22bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
23bd882c8aSJames Wright 
24bd882c8aSJames Wright   scratch[item_id_x] = *U;
25bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
26bd882c8aSJames Wright 
27bd882c8aSJames Wright   *V = 0.0;
28bd882c8aSJames Wright   if (item_id_x < Q_1D) {
29bd882c8aSJames Wright     for (CeedInt i = 0; i < P_1D; i++) {
30bd882c8aSJames Wright       *V += B[i + item_id_x * P_1D] * scratch[i];  // Contract x direction
31bd882c8aSJames Wright     }
32bd882c8aSJames Wright   }
33bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
34bd882c8aSJames Wright }
35bd882c8aSJames Wright 
36bd882c8aSJames Wright //------------------------------------------------------------------------------
37bd882c8aSJames Wright // 1D transpose tensor contraction x
38bd882c8aSJames Wright //------------------------------------------------------------------------------
39bd882c8aSJames Wright inline void ContractTransposeX1d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
40bd882c8aSJames Wright                                  private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
41bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
42bd882c8aSJames Wright 
43bd882c8aSJames Wright   scratch[item_id_x] = *U;
44bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
45bd882c8aSJames Wright 
46bd882c8aSJames Wright   *V = 0.0;
47bd882c8aSJames Wright   if (item_id_x < P_1D) {
48bd882c8aSJames Wright     for (CeedInt i = 0; i < Q_1D; i++) {
49bd882c8aSJames Wright       *V += B[item_id_x + i * P_1D] * scratch[i];  // Contract x direction
50bd882c8aSJames Wright     }
51bd882c8aSJames Wright   }
52bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
53bd882c8aSJames Wright }
54bd882c8aSJames Wright 
55bd882c8aSJames Wright //------------------------------------------------------------------------------
56bd882c8aSJames Wright // 1D interpolate to quadrature points
57bd882c8aSJames Wright //------------------------------------------------------------------------------
58bd882c8aSJames Wright inline void Interp1d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
59bd882c8aSJames Wright                      local const CeedScalar *restrict s_B, private CeedScalar *restrict r_V, local CeedScalar *restrict scratch) {
60bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
61bd882c8aSJames Wright     ContractX1d(P_1D, Q_1D, r_U + comp, s_B, r_V + comp, scratch);
62bd882c8aSJames Wright   }
63bd882c8aSJames Wright }
64bd882c8aSJames Wright 
65bd882c8aSJames Wright //------------------------------------------------------------------------------
66bd882c8aSJames Wright // 1D interpolate transpose
67bd882c8aSJames Wright //------------------------------------------------------------------------------
68bd882c8aSJames Wright inline void InterpTranspose1d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
69bd882c8aSJames Wright                               local const CeedScalar *restrict s_B, private CeedScalar *restrict r_V, local CeedScalar *restrict scratch) {
70bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
71bd882c8aSJames Wright     ContractTransposeX1d(P_1D, Q_1D, r_U + comp, s_B, r_V + comp, scratch);
72bd882c8aSJames Wright   }
73bd882c8aSJames Wright }
74bd882c8aSJames Wright 
75bd882c8aSJames Wright //------------------------------------------------------------------------------
76bd882c8aSJames Wright // 1D derivatives at quadrature points
77bd882c8aSJames Wright //------------------------------------------------------------------------------
78bd882c8aSJames Wright inline void Grad1d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
79bd882c8aSJames Wright                    local const CeedScalar *restrict s_G, private CeedScalar *restrict r_V, local CeedScalar *restrict scratch) {
80bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
81bd882c8aSJames Wright     ContractX1d(P_1D, Q_1D, r_U + comp, s_G, r_V + comp, scratch);
82bd882c8aSJames Wright   }
83bd882c8aSJames Wright }
84bd882c8aSJames Wright 
85bd882c8aSJames Wright //------------------------------------------------------------------------------
86bd882c8aSJames Wright // 1D derivatives transpose
87bd882c8aSJames Wright //------------------------------------------------------------------------------
88bd882c8aSJames Wright inline void GradTranspose1d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
89bd882c8aSJames Wright                             local const CeedScalar *restrict s_G, private CeedScalar *restrict r_V, local CeedScalar *restrict scratch) {
90bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
91bd882c8aSJames Wright     ContractTransposeX1d(P_1D, Q_1D, r_U + comp, s_G, r_V + comp, scratch);
92bd882c8aSJames Wright   }
93bd882c8aSJames Wright }
94bd882c8aSJames Wright 
95bd882c8aSJames Wright //------------------------------------------------------------------------------
96bd882c8aSJames Wright // 1D quadrature weights
97bd882c8aSJames Wright //------------------------------------------------------------------------------
98bd882c8aSJames Wright inline void Weight1d(const CeedInt Q_1D, const CeedScalar *restrict q_weight_1d, CeedScalar *restrict w) {
99bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
100bd882c8aSJames Wright   *w                      = (item_id_x < Q_1D) ? q_weight_1d[item_id_x] : 0.0;
101bd882c8aSJames Wright }
102bd882c8aSJames Wright 
103bd882c8aSJames Wright //------------------------------------------------------------------------------
104bd882c8aSJames Wright // 2D
105bd882c8aSJames Wright //------------------------------------------------------------------------------
106bd882c8aSJames Wright 
107bd882c8aSJames Wright //------------------------------------------------------------------------------
108bd882c8aSJames Wright // 2D tensor contraction x
109bd882c8aSJames Wright //------------------------------------------------------------------------------
110bd882c8aSJames Wright inline void ContractX2d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
111bd882c8aSJames Wright                         private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
112bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
113bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
114bd882c8aSJames Wright 
115bd882c8aSJames Wright   scratch[item_id_x + item_id_y * T_1D] = *U;
116bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
117bd882c8aSJames Wright 
118bd882c8aSJames Wright   *V = 0.0;
119bd882c8aSJames Wright   if (item_id_x < Q_1D && item_id_y < P_1D) {
120bd882c8aSJames Wright     for (CeedInt i = 0; i < P_1D; i++) {
121bd882c8aSJames Wright       *V += B[i + item_id_x * P_1D] * scratch[i + item_id_y * T_1D];  // Contract x direction
122bd882c8aSJames Wright     }
123bd882c8aSJames Wright   }
124bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
125bd882c8aSJames Wright }
126bd882c8aSJames Wright 
127bd882c8aSJames Wright //------------------------------------------------------------------------------
128bd882c8aSJames Wright // 2D tensor contract y
129bd882c8aSJames Wright //------------------------------------------------------------------------------
130bd882c8aSJames Wright inline void ContractY2d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
131bd882c8aSJames Wright                         private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
132bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
133bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
134bd882c8aSJames Wright 
135bd882c8aSJames Wright   scratch[item_id_x + item_id_y * T_1D] = *U;
136bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
137bd882c8aSJames Wright 
138bd882c8aSJames Wright   *V = 0.0;
139bd882c8aSJames Wright   if (item_id_x < Q_1D && item_id_y < Q_1D) {
140bd882c8aSJames Wright     for (CeedInt i = 0; i < P_1D; i++) {
141bd882c8aSJames Wright       *V += B[i + item_id_y * P_1D] * scratch[item_id_x + i * T_1D];  // Contract y direction
142bd882c8aSJames Wright     }
143bd882c8aSJames Wright   }
144bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
145bd882c8aSJames Wright }
146bd882c8aSJames Wright 
147bd882c8aSJames Wright //------------------------------------------------------------------------------
148bd882c8aSJames Wright // 2D transpose tensor contract y
149bd882c8aSJames Wright //------------------------------------------------------------------------------
150bd882c8aSJames Wright inline void ContractTransposeY2d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
151bd882c8aSJames Wright                                  private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
152bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
153bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
154bd882c8aSJames Wright 
155bd882c8aSJames Wright   scratch[item_id_x + item_id_y * T_1D] = *U;
156bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
157bd882c8aSJames Wright 
158bd882c8aSJames Wright   *V = 0.0;
159bd882c8aSJames Wright   if (item_id_x < Q_1D && item_id_y < P_1D) {
160bd882c8aSJames Wright     for (CeedInt i = 0; i < Q_1D; i++) {
161bd882c8aSJames Wright       *V += B[item_id_y + i * P_1D] * scratch[item_id_x + i * T_1D];  // Contract y direction
162bd882c8aSJames Wright     }
163bd882c8aSJames Wright   }
164bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
165bd882c8aSJames Wright }
166bd882c8aSJames Wright 
167bd882c8aSJames Wright //------------------------------------------------------------------------------
168bd882c8aSJames Wright // 2D transpose tensor contract x
169bd882c8aSJames Wright //------------------------------------------------------------------------------
170bd882c8aSJames Wright inline void ContractTransposeX2d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
171bd882c8aSJames Wright                                  private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
172bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
173bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
174bd882c8aSJames Wright 
175bd882c8aSJames Wright   scratch[item_id_x + item_id_y * T_1D] = *U;
176bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
177bd882c8aSJames Wright 
178bd882c8aSJames Wright   *V = 0.0;
179bd882c8aSJames Wright   if (item_id_x < P_1D && item_id_y < P_1D) {
180bd882c8aSJames Wright     for (CeedInt i = 0; i < Q_1D; i++) {
181bd882c8aSJames Wright       *V += B[item_id_x + i * P_1D] * scratch[i + item_id_y * T_1D];  // Contract x direction
182bd882c8aSJames Wright     }
183bd882c8aSJames Wright   }
184bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
185bd882c8aSJames Wright }
186bd882c8aSJames Wright 
187bd882c8aSJames Wright //------------------------------------------------------------------------------
188bd882c8aSJames Wright // 2D transpose tensor contract and add x
189bd882c8aSJames Wright //------------------------------------------------------------------------------
190bd882c8aSJames Wright inline void ContractTransposeAddX2d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
191bd882c8aSJames Wright                                     private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
192bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
193bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
194bd882c8aSJames Wright 
195bd882c8aSJames Wright   scratch[item_id_x + item_id_y * T_1D] = *U;
196bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
197bd882c8aSJames Wright 
198bd882c8aSJames Wright   if (item_id_x < P_1D && item_id_y < P_1D) {
199bd882c8aSJames Wright     for (CeedInt i = 0; i < Q_1D; i++) {
200bd882c8aSJames Wright       *V += B[item_id_x + i * P_1D] * scratch[i + item_id_y * T_1D];  // Contract x direction
201bd882c8aSJames Wright     }
202bd882c8aSJames Wright   }
203bd882c8aSJames Wright   work_group_barrier(CLK_LOCAL_MEM_FENCE);
204bd882c8aSJames Wright }
205bd882c8aSJames Wright 
206bd882c8aSJames Wright //------------------------------------------------------------------------------
207bd882c8aSJames Wright // 2D interpolate to quadrature points
208bd882c8aSJames Wright //------------------------------------------------------------------------------
209bd882c8aSJames Wright inline void InterpTensor2d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
210bd882c8aSJames Wright                            local const CeedScalar *restrict s_B, private CeedScalar *restrict r_V, local CeedScalar *restrict scratch) {
211bd882c8aSJames Wright   CeedScalar r_t[1];
212bd882c8aSJames Wright 
213bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
214bd882c8aSJames Wright     ContractX2d(P_1D, Q_1D, r_U + comp, s_B, r_t, scratch);
215bd882c8aSJames Wright     ContractY2d(P_1D, Q_1D, r_t, s_B, r_V + comp, scratch);
216bd882c8aSJames Wright   }
217bd882c8aSJames Wright }
218bd882c8aSJames Wright 
219bd882c8aSJames Wright //------------------------------------------------------------------------------
220bd882c8aSJames Wright // 2D interpolate transpose
221bd882c8aSJames Wright //------------------------------------------------------------------------------
222bd882c8aSJames Wright inline void InterpTransposeTensor2d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
223bd882c8aSJames Wright                                     local const CeedScalar *restrict s_B, private CeedScalar *restrict r_V, local CeedScalar *restrict scratch) {
224bd882c8aSJames Wright   CeedScalar r_t[1];
225bd882c8aSJames Wright 
226bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
227bd882c8aSJames Wright     ContractTransposeY2d(P_1D, Q_1D, r_U + comp, s_B, r_t, scratch);
228bd882c8aSJames Wright     ContractTransposeX2d(P_1D, Q_1D, r_t, s_B, r_V + comp, scratch);
229bd882c8aSJames Wright   }
230bd882c8aSJames Wright }
231bd882c8aSJames Wright 
232bd882c8aSJames Wright //------------------------------------------------------------------------------
233bd882c8aSJames Wright // 2D derivatives at quadrature points
234bd882c8aSJames Wright //------------------------------------------------------------------------------
235bd882c8aSJames Wright inline void GradTensor2d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
236bd882c8aSJames Wright                          local const CeedScalar *restrict s_B, local const CeedScalar *restrict s_G, private CeedScalar *restrict r_V,
237bd882c8aSJames Wright                          local CeedScalar *restrict scratch) {
238bd882c8aSJames Wright   CeedScalar r_t[1];
239bd882c8aSJames Wright 
240bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
241bd882c8aSJames Wright     ContractX2d(P_1D, Q_1D, r_U + comp, s_G, r_t, scratch);
242bd882c8aSJames Wright     ContractY2d(P_1D, Q_1D, r_t, s_B, r_V + comp + 0 * NUM_COMP, scratch);
243bd882c8aSJames Wright     ContractX2d(P_1D, Q_1D, r_U + comp, s_B, r_t, scratch);
244bd882c8aSJames Wright     ContractY2d(P_1D, Q_1D, r_t, s_G, r_V + comp + 1 * NUM_COMP, scratch);
245bd882c8aSJames Wright   }
246bd882c8aSJames Wright }
247bd882c8aSJames Wright 
248bd882c8aSJames Wright //------------------------------------------------------------------------------
249bd882c8aSJames Wright // 2D derivatives transpose
250bd882c8aSJames Wright //------------------------------------------------------------------------------
251bd882c8aSJames Wright inline void GradTransposeTensor2d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
252bd882c8aSJames Wright                                   local const CeedScalar *restrict s_B, local const CeedScalar *restrict s_G, private CeedScalar *restrict r_V,
253bd882c8aSJames Wright                                   local CeedScalar *restrict scratch) {
254bd882c8aSJames Wright   CeedScalar r_t[1];
255bd882c8aSJames Wright 
256bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
257bd882c8aSJames Wright     ContractTransposeY2d(P_1D, Q_1D, r_U + comp + 0 * NUM_COMP, s_B, r_t, scratch);
258bd882c8aSJames Wright     ContractTransposeX2d(P_1D, Q_1D, r_t, s_G, r_V + comp, scratch);
259bd882c8aSJames Wright     ContractTransposeY2d(P_1D, Q_1D, r_U + comp + 1 * NUM_COMP, s_G, r_t, scratch);
260bd882c8aSJames Wright     ContractTransposeAddX2d(P_1D, Q_1D, r_t, s_B, r_V + comp, scratch);
261bd882c8aSJames Wright   }
262bd882c8aSJames Wright }
263bd882c8aSJames Wright 
264bd882c8aSJames Wright //------------------------------------------------------------------------------
265bd882c8aSJames Wright // 2D quadrature weights
266bd882c8aSJames Wright //------------------------------------------------------------------------------
267bd882c8aSJames Wright inline void WeightTensor2d(const CeedInt Q_1D, const CeedScalar *restrict q_weight_1d, CeedScalar *restrict w) {
268bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
269bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
270bd882c8aSJames Wright 
271bd882c8aSJames Wright   *w = (item_id_x < Q_1D && item_id_y < Q_1D) ? q_weight_1d[item_id_x] * q_weight_1d[item_id_y] : 0.0;
272bd882c8aSJames Wright }
273bd882c8aSJames Wright 
274bd882c8aSJames Wright //------------------------------------------------------------------------------
275bd882c8aSJames Wright // 3D
276bd882c8aSJames Wright //------------------------------------------------------------------------------
277bd882c8aSJames Wright 
278bd882c8aSJames Wright //------------------------------------------------------------------------------
279bd882c8aSJames Wright // 3D tensor contract x
280bd882c8aSJames Wright //------------------------------------------------------------------------------
281bd882c8aSJames Wright inline void ContractX3d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
282bd882c8aSJames Wright                         private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
283bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
284bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
285bd882c8aSJames Wright 
286bd882c8aSJames Wright   CeedScalar r_B[T_1D];
287bd882c8aSJames Wright   for (CeedInt i = 0; i < P_1D; i++) {
288bd882c8aSJames Wright     r_B[i] = B[i + item_id_x * P_1D];
289bd882c8aSJames Wright   }
290bd882c8aSJames Wright 
291bd882c8aSJames Wright   for (CeedInt k = 0; k < P_1D; k++) {
292bd882c8aSJames Wright     scratch[item_id_x + item_id_y * T_1D] = U[k];
293bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
294bd882c8aSJames Wright 
295bd882c8aSJames Wright     V[k] = 0.0;
296bd882c8aSJames Wright     if (item_id_x < Q_1D && item_id_y < P_1D) {
297bd882c8aSJames Wright       for (CeedInt i = 0; i < P_1D; i++) {
298bd882c8aSJames Wright         V[k] += r_B[i] * scratch[i + item_id_y * T_1D];  // Contract x direction
299bd882c8aSJames Wright       }
300bd882c8aSJames Wright     }
301bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
302bd882c8aSJames Wright   }
303bd882c8aSJames Wright }
304bd882c8aSJames Wright 
305bd882c8aSJames Wright //------------------------------------------------------------------------------
306bd882c8aSJames Wright // 3D tensor contract y
307bd882c8aSJames Wright //------------------------------------------------------------------------------
308bd882c8aSJames Wright inline void ContractY3d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
309bd882c8aSJames Wright                         private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
310bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
311bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
312bd882c8aSJames Wright 
313bd882c8aSJames Wright   CeedScalar r_B[T_1D];
314bd882c8aSJames Wright   for (CeedInt i = 0; i < P_1D; i++) {
315bd882c8aSJames Wright     r_B[i] = B[i + item_id_y * P_1D];
316bd882c8aSJames Wright   }
317bd882c8aSJames Wright 
318bd882c8aSJames Wright   for (CeedInt k = 0; k < P_1D; k++) {
319bd882c8aSJames Wright     scratch[item_id_x + item_id_y * T_1D] = U[k];
320bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
321bd882c8aSJames Wright 
322bd882c8aSJames Wright     V[k] = 0.0;
323bd882c8aSJames Wright     if (item_id_x < Q_1D && item_id_y < Q_1D) {
324bd882c8aSJames Wright       for (CeedInt i = 0; i < P_1D; i++) {
325bd882c8aSJames Wright         V[k] += r_B[i] * scratch[item_id_x + i * T_1D];  // Contract y direction
326bd882c8aSJames Wright       }
327bd882c8aSJames Wright     }
328bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
329bd882c8aSJames Wright   }
330bd882c8aSJames Wright }
331bd882c8aSJames Wright 
332bd882c8aSJames Wright //------------------------------------------------------------------------------
333bd882c8aSJames Wright // 3D tensor contract z
334bd882c8aSJames Wright //------------------------------------------------------------------------------
335bd882c8aSJames Wright inline void ContractZ3d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
336bd882c8aSJames Wright                         private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
337bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
338bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
339bd882c8aSJames Wright 
340bd882c8aSJames Wright   for (CeedInt k = 0; k < Q_1D; k++) {
341bd882c8aSJames Wright     V[k] = 0.0;
342bd882c8aSJames Wright     if (item_id_x < Q_1D && item_id_y < Q_1D) {
343bd882c8aSJames Wright       for (CeedInt i = 0; i < P_1D; i++) {
344bd882c8aSJames Wright         V[k] += B[i + k * P_1D] * U[i];  // Contract z direction
345bd882c8aSJames Wright       }
346bd882c8aSJames Wright     }
347bd882c8aSJames Wright   }
348bd882c8aSJames Wright }
349bd882c8aSJames Wright 
350bd882c8aSJames Wright //------------------------------------------------------------------------------
351bd882c8aSJames Wright // 3D transpose tensor contract z
352bd882c8aSJames Wright //------------------------------------------------------------------------------
353bd882c8aSJames Wright inline void ContractTransposeZ3d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
354bd882c8aSJames Wright                                  private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
355bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
356bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
357bd882c8aSJames Wright 
358bd882c8aSJames Wright   for (CeedInt k = 0; k < P_1D; k++) {
359bd882c8aSJames Wright     V[k] = 0.0;
360bd882c8aSJames Wright     if (item_id_x < Q_1D && item_id_y < Q_1D) {
361bd882c8aSJames Wright       for (CeedInt i = 0; i < Q_1D; i++) {
362bd882c8aSJames Wright         V[k] += B[k + i * P_1D] * U[i];  // Contract z direction
363bd882c8aSJames Wright       }
364bd882c8aSJames Wright     }
365bd882c8aSJames Wright   }
366bd882c8aSJames Wright }
367bd882c8aSJames Wright 
368bd882c8aSJames Wright //------------------------------------------------------------------------------
369bd882c8aSJames Wright // 3D transpose tensor contract y
370bd882c8aSJames Wright //------------------------------------------------------------------------------
371bd882c8aSJames Wright inline void ContractTransposeY3d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
372bd882c8aSJames Wright                                  private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
373bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
374bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
375bd882c8aSJames Wright 
376bd882c8aSJames Wright   CeedScalar r_B[T_1D];
377bd882c8aSJames Wright   for (CeedInt i = 0; i < Q_1D; i++) {
378bd882c8aSJames Wright     r_B[i] = B[item_id_y + i * P_1D];
379bd882c8aSJames Wright   }
380bd882c8aSJames Wright 
381bd882c8aSJames Wright   for (CeedInt k = 0; k < P_1D; k++) {
382bd882c8aSJames Wright     scratch[item_id_x + item_id_y * T_1D] = U[k];
383bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
384bd882c8aSJames Wright 
385bd882c8aSJames Wright     V[k] = 0.0;
386bd882c8aSJames Wright     if (item_id_x < Q_1D && item_id_y < P_1D) {
387bd882c8aSJames Wright       for (CeedInt i = 0; i < Q_1D; i++) {
388bd882c8aSJames Wright         V[k] += r_B[i] * scratch[item_id_x + i * T_1D];  // Contract y direction
389bd882c8aSJames Wright       }
390bd882c8aSJames Wright     }
391bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
392bd882c8aSJames Wright   }
393bd882c8aSJames Wright }
394bd882c8aSJames Wright 
395bd882c8aSJames Wright //------------------------------------------------------------------------------
396bd882c8aSJames Wright // 3D transpose tensor contract y
397bd882c8aSJames Wright //------------------------------------------------------------------------------
398bd882c8aSJames Wright inline void ContractTransposeAddY3d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
399bd882c8aSJames Wright                                     private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
400bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
401bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
402bd882c8aSJames Wright 
403bd882c8aSJames Wright   CeedScalar r_B[T_1D];
404bd882c8aSJames Wright   for (CeedInt i = 0; i < Q_1D; i++) {
405bd882c8aSJames Wright     r_B[i] = B[item_id_y + i * P_1D];
406bd882c8aSJames Wright   }
407bd882c8aSJames Wright 
408bd882c8aSJames Wright   for (CeedInt k = 0; k < P_1D; k++) {
409bd882c8aSJames Wright     scratch[item_id_x + item_id_y * T_1D] = U[k];
410bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
411bd882c8aSJames Wright     if (item_id_x < Q_1D && item_id_y < P_1D) {
412bd882c8aSJames Wright       for (CeedInt i = 0; i < Q_1D; i++) {
413bd882c8aSJames Wright         V[k] += r_B[i] * scratch[item_id_x + i * T_1D];  // Contract y direction
414bd882c8aSJames Wright       }
415bd882c8aSJames Wright     }
416bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
417bd882c8aSJames Wright   }
418bd882c8aSJames Wright }
419bd882c8aSJames Wright 
420bd882c8aSJames Wright //------------------------------------------------------------------------------
421bd882c8aSJames Wright // 3D transpose tensor contract x
422bd882c8aSJames Wright //------------------------------------------------------------------------------
423bd882c8aSJames Wright inline void ContractTransposeX3d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
424bd882c8aSJames Wright                                  private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
425bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
426bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
427bd882c8aSJames Wright 
428bd882c8aSJames Wright   CeedScalar r_B[T_1D];
429bd882c8aSJames Wright   for (CeedInt i = 0; i < Q_1D; i++) {
430bd882c8aSJames Wright     r_B[i] = B[item_id_x + i * P_1D];
431bd882c8aSJames Wright   }
432bd882c8aSJames Wright 
433bd882c8aSJames Wright   for (CeedInt k = 0; k < P_1D; k++) {
434bd882c8aSJames Wright     scratch[item_id_x + item_id_y * T_1D] = U[k];
435bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
436bd882c8aSJames Wright     V[k] = 0.0;
437bd882c8aSJames Wright     if (item_id_x < P_1D && item_id_y < P_1D) {
438bd882c8aSJames Wright       for (CeedInt i = 0; i < Q_1D; i++) {
439bd882c8aSJames Wright         V[k] += r_B[i] * scratch[i + item_id_y * T_1D];  // Contract x direction
440bd882c8aSJames Wright       }
441bd882c8aSJames Wright     }
442bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
443bd882c8aSJames Wright   }
444bd882c8aSJames Wright }
445bd882c8aSJames Wright 
446bd882c8aSJames Wright //------------------------------------------------------------------------------
447bd882c8aSJames Wright // 3D transpose tensor contract add x
448bd882c8aSJames Wright //------------------------------------------------------------------------------
449bd882c8aSJames Wright inline void ContractTransposeAddX3d(const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict U, local const CeedScalar *restrict B,
450bd882c8aSJames Wright                                     private CeedScalar *restrict V, local CeedScalar *restrict scratch) {
451bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
452bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
453bd882c8aSJames Wright 
454bd882c8aSJames Wright   CeedScalar r_B[T_1D];
455bd882c8aSJames Wright   for (CeedInt i = 0; i < Q_1D; i++) {
456bd882c8aSJames Wright     r_B[i] = B[item_id_x + i * P_1D];
457bd882c8aSJames Wright   }
458bd882c8aSJames Wright 
459bd882c8aSJames Wright   for (CeedInt k = 0; k < P_1D; k++) {
460bd882c8aSJames Wright     scratch[item_id_x + item_id_y * T_1D] = U[k];
461bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
462bd882c8aSJames Wright 
463bd882c8aSJames Wright     if (item_id_x < P_1D && item_id_y < P_1D) {
464bd882c8aSJames Wright       for (CeedInt i = 0; i < Q_1D; i++) {
465bd882c8aSJames Wright         V[k] += r_B[i] * scratch[i + item_id_y * T_1D];  // Contract x direction
466bd882c8aSJames Wright       }
467bd882c8aSJames Wright     }
468bd882c8aSJames Wright     work_group_barrier(CLK_LOCAL_MEM_FENCE);
469bd882c8aSJames Wright   }
470bd882c8aSJames Wright }
471bd882c8aSJames Wright 
472bd882c8aSJames Wright //------------------------------------------------------------------------------
473bd882c8aSJames Wright // 3D interpolate to quadrature points
474bd882c8aSJames Wright //------------------------------------------------------------------------------
475bd882c8aSJames Wright inline void InterpTensor3d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
476bd882c8aSJames Wright                            local const CeedScalar *restrict s_B, private CeedScalar *restrict r_V, local CeedScalar *restrict scratch) {
477bd882c8aSJames Wright   CeedScalar r_t1[T_1D];
478bd882c8aSJames Wright   CeedScalar r_t2[T_1D];
479bd882c8aSJames Wright 
480bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
481bd882c8aSJames Wright     ContractX3d(P_1D, Q_1D, r_U + comp * P_1D, s_B, r_t1, scratch);
482bd882c8aSJames Wright     ContractY3d(P_1D, Q_1D, r_t1, s_B, r_t2, scratch);
483bd882c8aSJames Wright     ContractZ3d(P_1D, Q_1D, r_t2, s_B, r_V + comp * Q_1D, scratch);
484bd882c8aSJames Wright   }
485bd882c8aSJames Wright }
486bd882c8aSJames Wright 
487bd882c8aSJames Wright //------------------------------------------------------------------------------
488bd882c8aSJames Wright // 3D interpolate transpose
489bd882c8aSJames Wright //------------------------------------------------------------------------------
490bd882c8aSJames Wright inline void InterpTransposeTensor3d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
491bd882c8aSJames Wright                                     local const CeedScalar *restrict s_B, private CeedScalar *restrict r_V, local CeedScalar *restrict scratch) {
492bd882c8aSJames Wright   CeedScalar r_t1[T_1D];
493bd882c8aSJames Wright   CeedScalar r_t2[T_1D];
494bd882c8aSJames Wright 
495bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
496bd882c8aSJames Wright     ContractTransposeZ3d(P_1D, Q_1D, r_U + comp * Q_1D, s_B, r_t1, scratch);
497bd882c8aSJames Wright     ContractTransposeY3d(P_1D, Q_1D, r_t1, s_B, r_t2, scratch);
498bd882c8aSJames Wright     ContractTransposeX3d(P_1D, Q_1D, r_t2, s_B, r_V + comp * P_1D, scratch);
499bd882c8aSJames Wright   }
500bd882c8aSJames Wright }
501bd882c8aSJames Wright 
502bd882c8aSJames Wright //------------------------------------------------------------------------------
503bd882c8aSJames Wright // 3D derivatives at quadrature points
504bd882c8aSJames Wright //------------------------------------------------------------------------------
505bd882c8aSJames Wright inline void GradTensor3d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
506bd882c8aSJames Wright                          local const CeedScalar *restrict s_B, local const CeedScalar *restrict s_G, private CeedScalar *restrict r_V,
507bd882c8aSJames Wright                          local CeedScalar *restrict scratch) {
508bd882c8aSJames Wright   CeedScalar r_t1[T_1D];
509bd882c8aSJames Wright   CeedScalar r_t2[T_1D];
510bd882c8aSJames Wright 
511bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
512bd882c8aSJames Wright     ContractX3d(P_1D, Q_1D, r_U + comp * P_1D, s_G, r_t1, scratch);
513bd882c8aSJames Wright     ContractY3d(P_1D, Q_1D, r_t1, s_B, r_t2, scratch);
514bd882c8aSJames Wright     ContractZ3d(P_1D, Q_1D, r_t2, s_B, r_V + comp * Q_1D + 0 * NUM_COMP * Q_1D, scratch);
515bd882c8aSJames Wright     ContractX3d(P_1D, Q_1D, r_U + comp * P_1D, s_B, r_t1, scratch);
516bd882c8aSJames Wright     ContractY3d(P_1D, Q_1D, r_t1, s_G, r_t2, scratch);
517bd882c8aSJames Wright     ContractZ3d(P_1D, Q_1D, r_t2, s_B, r_V + comp * Q_1D + 1 * NUM_COMP * Q_1D, scratch);
518bd882c8aSJames Wright     ContractX3d(P_1D, Q_1D, r_U + comp * P_1D, s_B, r_t1, scratch);
519bd882c8aSJames Wright     ContractY3d(P_1D, Q_1D, r_t1, s_B, r_t2, scratch);
520bd882c8aSJames Wright     ContractZ3d(P_1D, Q_1D, r_t2, s_G, r_V + comp * Q_1D + 2 * NUM_COMP * Q_1D, scratch);
521bd882c8aSJames Wright   }
522bd882c8aSJames Wright }
523bd882c8aSJames Wright 
524bd882c8aSJames Wright //------------------------------------------------------------------------------
525bd882c8aSJames Wright // 3D derivatives transpose
526bd882c8aSJames Wright //------------------------------------------------------------------------------
527bd882c8aSJames Wright inline void GradTransposeTensor3d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
528bd882c8aSJames Wright                                   local const CeedScalar *restrict s_B, local const CeedScalar *restrict s_G, private CeedScalar *restrict r_V,
529bd882c8aSJames Wright                                   local CeedScalar *restrict scratch) {
530bd882c8aSJames Wright   CeedScalar r_t1[T_1D];
531bd882c8aSJames Wright   CeedScalar r_t2[T_1D];
532bd882c8aSJames Wright 
533bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
534bd882c8aSJames Wright     ContractTransposeZ3d(P_1D, Q_1D, r_U + comp * Q_1D + 0 * NUM_COMP * Q_1D, s_B, r_t1, scratch);
535bd882c8aSJames Wright     ContractTransposeY3d(P_1D, Q_1D, r_t1, s_B, r_t2, scratch);
536bd882c8aSJames Wright     ContractTransposeX3d(P_1D, Q_1D, r_t2, s_G, r_V + comp * P_1D, scratch);
537bd882c8aSJames Wright     ContractTransposeZ3d(P_1D, Q_1D, r_U + comp * Q_1D + 1 * NUM_COMP * Q_1D, s_B, r_t1, scratch);
538bd882c8aSJames Wright     ContractTransposeY3d(P_1D, Q_1D, r_t1, s_G, r_t2, scratch);
539bd882c8aSJames Wright     ContractTransposeAddX3d(P_1D, Q_1D, r_t2, s_B, r_V + comp * P_1D, scratch);
540bd882c8aSJames Wright     ContractTransposeZ3d(P_1D, Q_1D, r_U + comp * Q_1D + 2 * NUM_COMP * Q_1D, s_G, r_t1, scratch);
541bd882c8aSJames Wright     ContractTransposeY3d(P_1D, Q_1D, r_t1, s_B, r_t2, scratch);
542bd882c8aSJames Wright     ContractTransposeAddX3d(P_1D, Q_1D, r_t2, s_B, r_V + comp * P_1D, scratch);
543bd882c8aSJames Wright   }
544bd882c8aSJames Wright }
545bd882c8aSJames Wright 
546bd882c8aSJames Wright //------------------------------------------------------------------------------
547bd882c8aSJames Wright // 3D derivatives at quadrature points
548bd882c8aSJames Wright //------------------------------------------------------------------------------
549bd882c8aSJames Wright inline void GradTensorCollocated3d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
550bd882c8aSJames Wright                                    local const CeedScalar *restrict s_B, local const CeedScalar *restrict s_G, private CeedScalar *restrict r_V,
551bd882c8aSJames Wright                                    local CeedScalar *restrict scratch) {
552bd882c8aSJames Wright   CeedScalar r_t1[T_1D];
553bd882c8aSJames Wright   CeedScalar r_t2[T_1D];
554bd882c8aSJames Wright 
555bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
556bd882c8aSJames Wright     ContractX3d(P_1D, Q_1D, r_U + comp * P_1D, s_B, r_t1, scratch);
557bd882c8aSJames Wright     ContractY3d(P_1D, Q_1D, r_t1, s_B, r_t2, scratch);
558bd882c8aSJames Wright     ContractZ3d(P_1D, Q_1D, r_t2, s_B, r_t1, scratch);
559bd882c8aSJames Wright     ContractX3d(Q_1D, Q_1D, r_t1, s_G, r_V + comp * Q_1D + 0 * NUM_COMP * Q_1D, scratch);
560bd882c8aSJames Wright     ContractY3d(Q_1D, Q_1D, r_t1, s_G, r_V + comp * Q_1D + 1 * NUM_COMP * Q_1D, scratch);
561bd882c8aSJames Wright     ContractZ3d(Q_1D, Q_1D, r_t1, s_G, r_V + comp * Q_1D + 2 * NUM_COMP * Q_1D, scratch);
562bd882c8aSJames Wright   }
563bd882c8aSJames Wright }
564bd882c8aSJames Wright 
565bd882c8aSJames Wright //------------------------------------------------------------------------------
566bd882c8aSJames Wright // 3D derivatives transpose
567bd882c8aSJames Wright //------------------------------------------------------------------------------
568bd882c8aSJames Wright inline void GradTransposeTensorCollocated3d(const CeedInt NUM_COMP, const CeedInt P_1D, const CeedInt Q_1D, private const CeedScalar *restrict r_U,
569bd882c8aSJames Wright                                             local const CeedScalar *restrict s_B, local const CeedScalar *restrict s_G,
570bd882c8aSJames Wright                                             private CeedScalar *restrict r_V, local CeedScalar *restrict scratch) {
571bd882c8aSJames Wright   CeedScalar r_t1[T_1D];
572bd882c8aSJames Wright   CeedScalar r_t2[T_1D];
573bd882c8aSJames Wright 
574bd882c8aSJames Wright   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
575bd882c8aSJames Wright     ContractTransposeZ3d(Q_1D, Q_1D, r_U + comp * Q_1D + 2 * NUM_COMP * Q_1D, s_G, r_t2, scratch);
576bd882c8aSJames Wright     ContractTransposeAddY3d(Q_1D, Q_1D, r_U + comp * Q_1D + 1 * NUM_COMP * Q_1D, s_G, r_t2, scratch);
577bd882c8aSJames Wright     ContractTransposeAddX3d(Q_1D, Q_1D, r_U + comp * Q_1D + 0 * NUM_COMP * Q_1D, s_G, r_t2, scratch);
578bd882c8aSJames Wright     ContractTransposeZ3d(P_1D, Q_1D, r_t2, s_B, r_t1, scratch);
579bd882c8aSJames Wright     ContractTransposeY3d(P_1D, Q_1D, r_t1, s_B, r_t2, scratch);
580bd882c8aSJames Wright     ContractTransposeX3d(P_1D, Q_1D, r_t2, s_B, r_V + comp * P_1D, scratch);
581bd882c8aSJames Wright   }
582bd882c8aSJames Wright }
583bd882c8aSJames Wright 
584bd882c8aSJames Wright //------------------------------------------------------------------------------
585bd882c8aSJames Wright // 3D quadrature weights
586bd882c8aSJames Wright //------------------------------------------------------------------------------
587bd882c8aSJames Wright // template <int Q_1D>
588bd882c8aSJames Wright inline void WeightTensor3d(const CeedInt Q_1D, const CeedScalar *restrict q_weight_1d, CeedScalar *restrict w) {
589bd882c8aSJames Wright   const CeedInt item_id_x = get_local_id(0);
590bd882c8aSJames Wright   const CeedInt item_id_y = get_local_id(1);
591bd882c8aSJames Wright 
592bd882c8aSJames Wright   if (item_id_x < Q_1D && item_id_y < Q_1D) {
593bd882c8aSJames Wright     const CeedScalar w_xy = q_weight_1d[item_id_x] * q_weight_1d[item_id_y];
594bd882c8aSJames Wright     for (CeedInt q = 0; q < Q_1D; ++q) w[q] = w_xy * q_weight_1d[q];
595bd882c8aSJames Wright   } else {
596bd882c8aSJames Wright     for (CeedInt q = 0; q < Q_1D; q++) w[q] = 0.0;
597bd882c8aSJames Wright   }
598bd882c8aSJames Wright }
599