xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision 9d15e85b4f78ffb2d2860753c87a3b1789cc3bb6)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
37f5b9731SStan Tomov //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
57f5b9731SStan Tomov //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
77f5b9731SStan Tomov 
849aac155SJeremy L Thompson #include <ceed.h>
9ec3da8bcSJed Brown #include <ceed/backend.h>
10f6af633fSnbeams #include <ceed/jit-tools.h>
11f6af633fSnbeams #include <string.h>
122b730f8bSJeremy L Thompson 
13e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP
14f6af633fSnbeams #include "../hip/ceed-hip-common.h"
15f6af633fSnbeams #include "../hip/ceed-hip-compile.h"
16f6af633fSnbeams #else
17f6af633fSnbeams #include "../cuda/ceed-cuda-common.h"
18f6af633fSnbeams #include "../cuda/ceed-cuda-compile.h"
19f6af633fSnbeams #endif
2000fb7a04SSebastian Grimberg #include "ceed-magma-common.h"
2100fb7a04SSebastian Grimberg #include "ceed-magma.h"
227f5b9731SStan Tomov 
23940a72f1SSebastian Grimberg #include "ceed-magma-gemm-nontensor.h"
24940a72f1SSebastian Grimberg #include "ceed-magma-gemm-selector.h"
25940a72f1SSebastian Grimberg 
26940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
27940a72f1SSebastian Grimberg // Basis apply - tensor
28940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
29940a72f1SSebastian Grimberg static int CeedBasisApply_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, CeedVector v) {
307f5b9731SStan Tomov   Ceed              ceed;
31e0582403Sabdelfattah83   Ceed_Magma       *data;
32940a72f1SSebastian Grimberg   CeedInt           dim, num_comp, num_nodes, P_1d, Q_1d, P, Q;
33940a72f1SSebastian Grimberg   const CeedScalar *d_u;
34940a72f1SSebastian Grimberg   CeedScalar       *d_v;
3538293ee6SJeremy L Thompson   CeedBasis_Magma  *impl;
3638293ee6SJeremy L Thompson 
3738293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
38940a72f1SSebastian Grimberg   CeedCallBackend(CeedGetData(ceed, &data));
39940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetData(basis, &impl));
4038293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetDimension(basis, &dim));
4138293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
42940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
4338293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d));
4438293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
45940a72f1SSebastian Grimberg   P = P_1d;
46940a72f1SSebastian Grimberg   Q = Q_1d;
4738293ee6SJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
4838293ee6SJeremy L Thompson     P = Q_1d;
4938293ee6SJeremy L Thompson     Q = P_1d;
507f5b9731SStan Tomov   }
517f5b9731SStan Tomov 
52940a72f1SSebastian Grimberg   // Read vectors
53940a72f1SSebastian Grimberg   if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
54940a72f1SSebastian Grimberg   else CeedCheck(e_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
55940a72f1SSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
56940a72f1SSebastian Grimberg 
57940a72f1SSebastian Grimberg   // Clear v for transpose operation
58940a72f1SSebastian Grimberg   if (t_mode == CEED_TRANSPOSE) {
59940a72f1SSebastian Grimberg     CeedSize length;
60940a72f1SSebastian Grimberg 
61940a72f1SSebastian Grimberg     CeedCallBackend(CeedVectorGetLength(v, &length));
62940a72f1SSebastian Grimberg     if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
63940a72f1SSebastian Grimberg       magmablas_slaset(MagmaFull, length, 1, 0.0, 0.0, (float *)d_v, length, data->queue);
64940a72f1SSebastian Grimberg     } else {
65940a72f1SSebastian Grimberg       magmablas_dlaset(MagmaFull, length, 1, 0.0, 0.0, (double *)d_v, length, data->queue);
66940a72f1SSebastian Grimberg     }
67940a72f1SSebastian Grimberg     ceed_magma_queue_sync(data->queue);
68940a72f1SSebastian Grimberg   }
69940a72f1SSebastian Grimberg 
70940a72f1SSebastian Grimberg   // Apply basis operation
71940a72f1SSebastian Grimberg   switch (e_mode) {
72940a72f1SSebastian Grimberg     case CEED_EVAL_INTERP: {
737f5b9731SStan Tomov       // Define element sizes for dofs/quad
7438293ee6SJeremy L Thompson       CeedInt elem_qpts_size = CeedIntPow(Q_1d, dim);
7538293ee6SJeremy L Thompson       CeedInt elem_dofs_size = CeedIntPow(P_1d, dim);
767f5b9731SStan Tomov 
777f5b9731SStan Tomov       // E-vector ordering -------------- Q-vector ordering
78868539c2SNatalie Beams       //  component                        component
79868539c2SNatalie Beams       //    elem                             elem
807f5b9731SStan Tomov       //       node                            node
817f5b9731SStan Tomov 
827f5b9731SStan Tomov       // ---  Define strides for NOTRANSPOSE mode: ---
83940a72f1SSebastian Grimberg       // Input (d_u) is E-vector, output (d_v) is Q-vector
847f5b9731SStan Tomov 
857f5b9731SStan Tomov       // Element strides
8638293ee6SJeremy L Thompson       CeedInt u_elem_stride = elem_dofs_size;
8738293ee6SJeremy L Thompson       CeedInt v_elem_stride = elem_qpts_size;
887f5b9731SStan Tomov       // Component strides
8938293ee6SJeremy L Thompson       CeedInt u_comp_stride = num_elem * elem_dofs_size;
9038293ee6SJeremy L Thompson       CeedInt v_comp_stride = num_elem * elem_qpts_size;
9138293ee6SJeremy L Thompson       if (t_mode == CEED_TRANSPOSE) {
92940a72f1SSebastian Grimberg         // Input (d_u) is Q-vector, output (d_v) is E-vector
937f5b9731SStan Tomov         // Element strides
9438293ee6SJeremy L Thompson         v_elem_stride = elem_dofs_size;
9538293ee6SJeremy L Thompson         u_elem_stride = elem_qpts_size;
967f5b9731SStan Tomov         // Component strides
9738293ee6SJeremy L Thompson         v_comp_stride = num_elem * elem_dofs_size;
9838293ee6SJeremy L Thompson         u_comp_stride = num_elem * elem_qpts_size;
997f5b9731SStan Tomov       }
10038293ee6SJeremy L Thompson       CeedInt num_threads = 1;
10138293ee6SJeremy L Thompson       CeedInt num_t_col   = 1;
10238293ee6SJeremy L Thompson       CeedInt shared_mem  = 0;
10338293ee6SJeremy L Thompson       CeedInt max_P_Q     = CeedIntMax(P, Q);
104f6af633fSnbeams 
105f6af633fSnbeams       switch (dim) {
106f6af633fSnbeams         case 1:
10738293ee6SJeremy L Thompson           num_threads = max_P_Q;
10838293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D);
10938293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q));
11038293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * (P * Q);
111f6af633fSnbeams           break;
112f6af633fSnbeams         case 2:
11338293ee6SJeremy L Thompson           num_threads = max_P_Q;
11438293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D);
11538293ee6SJeremy L Thompson           shared_mem += P * Q * sizeof(CeedScalar);  // for sT
116940a72f1SSebastian Grimberg           // for reforming rU we need P x P, and for the intermediate output we need P x Q
117940a72f1SSebastian Grimberg           shared_mem += num_t_col * (P * max_P_Q * sizeof(CeedScalar));
118f6af633fSnbeams           break;
119f6af633fSnbeams         case 3:
12038293ee6SJeremy L Thompson           num_threads = max_P_Q * max_P_Q;
12138293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D);
12238293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * (P * Q);  // for sT
123940a72f1SSebastian Grimberg           // rU needs P^2 x P, the intermediate output needs max(P^2 x Q, P x Q^2)
124940a72f1SSebastian Grimberg           shared_mem += sizeof(CeedScalar) * num_t_col * (CeedIntMax(P * P * max_P_Q, P * Q * Q));
125940a72f1SSebastian Grimberg           break;
126f6af633fSnbeams       }
127940a72f1SSebastian Grimberg       CeedInt grid   = CeedDivUpInt(num_elem, num_t_col);
128940a72f1SSebastian Grimberg       void   *args[] = {&impl->d_interp_1d, &d_u, &u_elem_stride, &u_comp_stride, &d_v, &v_elem_stride, &v_comp_stride, &num_elem};
129f6af633fSnbeams 
13038293ee6SJeremy L Thompson       if (t_mode == CEED_TRANSPOSE) {
131940a72f1SSebastian Grimberg         CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->InterpTranspose, grid, num_threads, num_t_col, 1, shared_mem, args));
132f6af633fSnbeams       } else {
133940a72f1SSebastian Grimberg         CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Interp, grid, num_threads, num_t_col, 1, shared_mem, args));
134f6af633fSnbeams       }
1352b730f8bSJeremy L Thompson     } break;
1363513a710Sjeremylt     case CEED_EVAL_GRAD: {
1377f5b9731SStan Tomov       // Define element sizes for dofs/quad
13838293ee6SJeremy L Thompson       CeedInt elem_qpts_size = CeedIntPow(Q_1d, dim);
13938293ee6SJeremy L Thompson       CeedInt elem_dofs_size = CeedIntPow(P_1d, dim);
1407f5b9731SStan Tomov 
141940a72f1SSebastian Grimberg       // In CEED_NOTRANSPOSE mode:
142940a72f1SSebastian Grimberg       // d_u is (P^dim x nc), column-major layout (nc = num_comp)
143940a72f1SSebastian Grimberg       // d_v is (Q^dim x nc x dim), column-major layout (nc = num_comp)
144940a72f1SSebastian Grimberg       // In CEED_TRANSPOSE mode, the sizes of d_u and d_v are switched.
145940a72f1SSebastian Grimberg 
1467f5b9731SStan Tomov       // E-vector ordering -------------- Q-vector ordering
1477f5b9731SStan Tomov       //                                  dim
148868539c2SNatalie Beams       //  component                        component
149868539c2SNatalie Beams       //    elem                              elem
1507f5b9731SStan Tomov       //       node                            node
1517f5b9731SStan Tomov 
1527f5b9731SStan Tomov       // ---  Define strides for NOTRANSPOSE mode: ---
153940a72f1SSebastian Grimberg       // Input (d_u) is E-vector, output (d_v) is Q-vector
1547f5b9731SStan Tomov 
1557f5b9731SStan Tomov       // Element strides
15638293ee6SJeremy L Thompson       CeedInt u_elem_stride = elem_dofs_size;
15738293ee6SJeremy L Thompson       CeedInt v_elem_stride = elem_qpts_size;
1587f5b9731SStan Tomov       // Component strides
15938293ee6SJeremy L Thompson       CeedInt u_comp_stride = num_elem * elem_dofs_size;
16038293ee6SJeremy L Thompson       CeedInt v_comp_stride = num_elem * elem_qpts_size;
1617f5b9731SStan Tomov       // Dimension strides
16238293ee6SJeremy L Thompson       CeedInt u_dim_stride = 0;
16338293ee6SJeremy L Thompson       CeedInt v_dim_stride = num_elem * elem_qpts_size * num_comp;
16438293ee6SJeremy L Thompson       if (t_mode == CEED_TRANSPOSE) {
165940a72f1SSebastian Grimberg         // Input (d_u) is Q-vector, output (d_v) is E-vector
1667f5b9731SStan Tomov         // Element strides
16738293ee6SJeremy L Thompson         v_elem_stride = elem_dofs_size;
16838293ee6SJeremy L Thompson         u_elem_stride = elem_qpts_size;
1697f5b9731SStan Tomov         // Component strides
17038293ee6SJeremy L Thompson         v_comp_stride = num_elem * elem_dofs_size;
17138293ee6SJeremy L Thompson         u_comp_stride = num_elem * elem_qpts_size;
1727f5b9731SStan Tomov         // Dimension strides
17338293ee6SJeremy L Thompson         v_dim_stride = 0;
17438293ee6SJeremy L Thompson         u_dim_stride = num_elem * elem_qpts_size * num_comp;
1757f5b9731SStan Tomov       }
17638293ee6SJeremy L Thompson       CeedInt num_threads = 1;
17738293ee6SJeremy L Thompson       CeedInt num_t_col   = 1;
17838293ee6SJeremy L Thompson       CeedInt shared_mem  = 0;
17938293ee6SJeremy L Thompson       CeedInt max_P_Q     = CeedIntMax(P, Q);
180f6af633fSnbeams 
181f6af633fSnbeams       switch (dim) {
182f6af633fSnbeams         case 1:
18338293ee6SJeremy L Thompson           num_threads = max_P_Q;
18438293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D);
18538293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q));
18638293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * (P * Q);
187f6af633fSnbeams           break;
188f6af633fSnbeams         case 2:
18938293ee6SJeremy L Thompson           num_threads = max_P_Q;
19038293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D);
19138293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * 2 * P * Q;  // for sTinterp and sTgrad
192940a72f1SSebastian Grimberg           // for reforming rU we need P x P, and for the intermediate output we need P x Q
193940a72f1SSebastian Grimberg           shared_mem += sizeof(CeedScalar) * num_t_col * (P * max_P_Q);
194f6af633fSnbeams           break;
195f6af633fSnbeams         case 3:
19638293ee6SJeremy L Thompson           num_threads = max_P_Q * max_P_Q;
19738293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D);
19838293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * 2 * P * Q;  // for sTinterp and sTgrad
199940a72f1SSebastian Grimberg           // rU needs P^2 x P, the intermediate outputs need (P^2 x Q + P x Q^2)
200940a72f1SSebastian Grimberg           shared_mem += sizeof(CeedScalar) * num_t_col * CeedIntMax(P * P * P, (P * P * Q) + (P * Q * Q));
201940a72f1SSebastian Grimberg           break;
202f6af633fSnbeams       }
203940a72f1SSebastian Grimberg       CeedInt grid   = CeedDivUpInt(num_elem, num_t_col);
204940a72f1SSebastian Grimberg       void   *args[] = {&impl->d_interp_1d, &impl->d_grad_1d, &d_u,          &u_elem_stride, &u_comp_stride, &u_dim_stride, &d_v,
20538293ee6SJeremy L Thompson                         &v_elem_stride,     &v_comp_stride,   &v_dim_stride, &num_elem};
206f6af633fSnbeams 
20738293ee6SJeremy L Thompson       if (t_mode == CEED_TRANSPOSE) {
208940a72f1SSebastian Grimberg         CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->GradTranspose, grid, num_threads, num_t_col, 1, shared_mem, args));
209f6af633fSnbeams       } else {
210940a72f1SSebastian Grimberg         CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Grad, grid, num_threads, num_t_col, 1, shared_mem, args));
211f6af633fSnbeams       }
2122b730f8bSJeremy L Thompson     } break;
2133513a710Sjeremylt     case CEED_EVAL_WEIGHT: {
214940a72f1SSebastian Grimberg       CeedCheck(t_mode != CEED_TRANSPOSE, ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
21538293ee6SJeremy L Thompson       CeedInt elem_dofs_size = CeedIntPow(Q, dim);
21638293ee6SJeremy L Thompson       CeedInt num_threads    = 1;
21738293ee6SJeremy L Thompson       CeedInt num_t_col      = 1;
21838293ee6SJeremy L Thompson       CeedInt shared_mem     = 0;
219f6af633fSnbeams 
220f6af633fSnbeams       switch (dim) {
221f6af633fSnbeams         case 1:
22238293ee6SJeremy L Thompson           num_threads = Q;
22338293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D);
22438293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * Q;              // for d_q_weight_1d
22538293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * num_t_col * Q;  // for output
226f6af633fSnbeams           break;
227f6af633fSnbeams         case 2:
22838293ee6SJeremy L Thompson           num_threads = Q;
22938293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D);
23038293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * Q;  // for d_q_weight_1d
231f6af633fSnbeams           break;
232f6af633fSnbeams         case 3:
23338293ee6SJeremy L Thompson           num_threads = Q * Q;
23438293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D);
23538293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * Q;  // for d_q_weight_1d
236940a72f1SSebastian Grimberg           break;
237f6af633fSnbeams       }
238940a72f1SSebastian Grimberg       CeedInt grid   = CeedDivUpInt(num_elem, num_t_col);
239940a72f1SSebastian Grimberg       void   *args[] = {&impl->d_q_weight_1d, &d_v, &elem_dofs_size, &num_elem};
240f6af633fSnbeams 
241940a72f1SSebastian Grimberg       CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Weight, grid, num_threads, num_t_col, 1, shared_mem, args));
2422b730f8bSJeremy L Thompson     } break;
2433513a710Sjeremylt     // LCOV_EXCL_START
2443513a710Sjeremylt     case CEED_EVAL_DIV:
245e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported");
2463513a710Sjeremylt     case CEED_EVAL_CURL:
247e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported");
2483513a710Sjeremylt     case CEED_EVAL_NONE:
2492b730f8bSJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_NONE does not make sense in this context");
2503513a710Sjeremylt       // LCOV_EXCL_STOP
2513513a710Sjeremylt   }
2527f5b9731SStan Tomov 
253940a72f1SSebastian Grimberg   // Must sync to ensure completeness
254e0582403Sabdelfattah83   ceed_magma_queue_sync(data->queue);
255e0582403Sabdelfattah83 
256940a72f1SSebastian Grimberg   // Restore vectors
25738293ee6SJeremy L Thompson   if (e_mode != CEED_EVAL_WEIGHT) {
258940a72f1SSebastian Grimberg     CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
2597f5b9731SStan Tomov   }
260940a72f1SSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
261e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2627f5b9731SStan Tomov }
2637f5b9731SStan Tomov 
264940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
265940a72f1SSebastian Grimberg // Basis apply - non-tensor
266940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
267940a72f1SSebastian Grimberg static int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u,
268940a72f1SSebastian Grimberg                                          CeedVector v) {
269868539c2SNatalie Beams   Ceed                      ceed;
270e0582403Sabdelfattah83   Ceed_Magma               *data;
271*9d15e85bSSebastian Grimberg   CeedInt                   num_comp, q_comp, num_nodes, num_qpts, P, Q, N;
272940a72f1SSebastian Grimberg   const CeedScalar         *d_u;
273940a72f1SSebastian Grimberg   CeedScalar               *d_v;
27438293ee6SJeremy L Thompson   CeedBasisNonTensor_Magma *impl;
27538293ee6SJeremy L Thompson 
27638293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
27738293ee6SJeremy L Thompson   CeedCallBackend(CeedGetData(ceed, &data));
278940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetData(basis, &impl));
27938293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
280*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, e_mode, &q_comp));
281940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
28238293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
283940a72f1SSebastian Grimberg   P = num_nodes;
284940a72f1SSebastian Grimberg   Q = num_qpts;
285940a72f1SSebastian Grimberg   N = num_elem * num_comp;
28638293ee6SJeremy L Thompson 
287940a72f1SSebastian Grimberg   // Read vectors
288940a72f1SSebastian Grimberg   if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
28938293ee6SJeremy L Thompson   else CeedCheck(e_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
290940a72f1SSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
291868539c2SNatalie Beams 
292940a72f1SSebastian Grimberg   // Clear v for transpose operation
29338293ee6SJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
2941f9221feSJeremy L Thompson     CeedSize length;
29538293ee6SJeremy L Thompson 
296940a72f1SSebastian Grimberg     CeedCallBackend(CeedVectorGetLength(v, &length));
29780a9ef05SNatalie Beams     if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
298940a72f1SSebastian Grimberg       magmablas_slaset(MagmaFull, length, 1, 0.0, 0.0, (float *)d_v, length, data->queue);
29980a9ef05SNatalie Beams     } else {
300940a72f1SSebastian Grimberg       magmablas_dlaset(MagmaFull, length, 1, 0.0, 0.0, (double *)d_v, length, data->queue);
30180a9ef05SNatalie Beams     }
302e0582403Sabdelfattah83     ceed_magma_queue_sync(data->queue);
303868539c2SNatalie Beams   }
30480a9ef05SNatalie Beams 
305940a72f1SSebastian Grimberg   // Apply basis operation
306940a72f1SSebastian Grimberg   if (e_mode != CEED_EVAL_WEIGHT) {
307*9d15e85bSSebastian Grimberg     const CeedScalar *d_b = NULL;
308*9d15e85bSSebastian Grimberg     switch (e_mode) {
309*9d15e85bSSebastian Grimberg       case CEED_EVAL_INTERP:
310*9d15e85bSSebastian Grimberg         d_b = impl->d_interp;
311*9d15e85bSSebastian Grimberg         break;
312*9d15e85bSSebastian Grimberg       case CEED_EVAL_GRAD:
313*9d15e85bSSebastian Grimberg         d_b = impl->d_grad;
314*9d15e85bSSebastian Grimberg         break;
315*9d15e85bSSebastian Grimberg       case CEED_EVAL_DIV:
316*9d15e85bSSebastian Grimberg         d_b = impl->d_div;
317*9d15e85bSSebastian Grimberg         break;
318*9d15e85bSSebastian Grimberg       case CEED_EVAL_CURL:
319*9d15e85bSSebastian Grimberg         d_b = impl->d_curl;
320*9d15e85bSSebastian Grimberg         break;
321*9d15e85bSSebastian Grimberg       // LCOV_EXCL_START
322*9d15e85bSSebastian Grimberg       case CEED_EVAL_WEIGHT:
323*9d15e85bSSebastian Grimberg         return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT does not make sense in this context");
324*9d15e85bSSebastian Grimberg       case CEED_EVAL_NONE:
325*9d15e85bSSebastian Grimberg         return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_NONE does not make sense in this context");
326*9d15e85bSSebastian Grimberg         // LCOV_EXCL_STOP
327*9d15e85bSSebastian Grimberg     }
328*9d15e85bSSebastian Grimberg 
329*9d15e85bSSebastian Grimberg     // Apply basis operation
330*9d15e85bSSebastian Grimberg     if (P <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P && Q <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) {
331940a72f1SSebastian Grimberg       CeedInt n_array[MAGMA_NONTENSOR_KERNEL_INSTANCES] = {MAGMA_NONTENSOR_KERNEL_N_VALUES};
332940a72f1SSebastian Grimberg       CeedInt iN = 0, diff = abs(n_array[iN] - N), idiff;
333940a72f1SSebastian Grimberg       CeedInt M = (t_mode == CEED_TRANSPOSE) ? P : Q, K = (t_mode == CEED_TRANSPOSE) ? Q : P;
33438293ee6SJeremy L Thompson 
335023b8a51Sabdelfattah83       for (CeedInt in = iN + 1; in < MAGMA_NONTENSOR_KERNEL_INSTANCES; in++) {
336940a72f1SSebastian Grimberg         idiff = abs(n_array[in] - N);
337023b8a51Sabdelfattah83         if (idiff < diff) {
338023b8a51Sabdelfattah83           iN   = in;
339023b8a51Sabdelfattah83           diff = idiff;
340868539c2SNatalie Beams         }
34180a9ef05SNatalie Beams       }
34280a9ef05SNatalie Beams 
343940a72f1SSebastian Grimberg       // Compile kernels for N as needed
344940a72f1SSebastian Grimberg       if (!impl->NB_interp[iN]) {
345*9d15e85bSSebastian Grimberg         CeedFESpace fe_space;
346*9d15e85bSSebastian Grimberg         CeedInt     q_comp_interp, q_comp_deriv;
347940a72f1SSebastian Grimberg         Ceed        ceed_delegate;
348*9d15e85bSSebastian Grimberg         char       *basis_kernel_path, *basis_kernel_source;
349940a72f1SSebastian Grimberg         magma_int_t arch = magma_getdevice_arch();
35080a9ef05SNatalie Beams 
351940a72f1SSebastian Grimberg         // Tuning parameters for NB
352*9d15e85bSSebastian Grimberg         CeedCallBackend(CeedBasisGetFESpace(basis, &fe_space));
353*9d15e85bSSebastian Grimberg         CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
354*9d15e85bSSebastian Grimberg         switch (fe_space) {
355*9d15e85bSSebastian Grimberg           case CEED_FE_SPACE_H1:
356*9d15e85bSSebastian Grimberg             CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_deriv));
357*9d15e85bSSebastian Grimberg             break;
358*9d15e85bSSebastian Grimberg           case CEED_FE_SPACE_HDIV:
359*9d15e85bSSebastian Grimberg             CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_deriv));
360*9d15e85bSSebastian Grimberg             break;
361*9d15e85bSSebastian Grimberg           case CEED_FE_SPACE_HCURL:
362*9d15e85bSSebastian Grimberg             CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_deriv));
363*9d15e85bSSebastian Grimberg             break;
364*9d15e85bSSebastian Grimberg         }
365*9d15e85bSSebastian Grimberg         impl->NB_interp[iN]   = nontensor_rtc_get_nb(arch, 'n', q_comp_interp, P, Q, n_array[iN]);
366*9d15e85bSSebastian Grimberg         impl->NB_interp_t[iN] = nontensor_rtc_get_nb(arch, 't', q_comp_interp, P, Q, n_array[iN]);
367*9d15e85bSSebastian Grimberg         impl->NB_deriv[iN]    = nontensor_rtc_get_nb(arch, 'n', q_comp_deriv, P, Q, n_array[iN]);
368*9d15e85bSSebastian Grimberg         impl->NB_deriv_t[iN]  = nontensor_rtc_get_nb(arch, 't', q_comp_deriv, P, Q, n_array[iN]);
369023b8a51Sabdelfattah83 
370940a72f1SSebastian Grimberg         // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data
371940a72f1SSebastian Grimberg         CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate));
372023b8a51Sabdelfattah83 
373940a72f1SSebastian Grimberg         // Compile kernels
374*9d15e85bSSebastian Grimberg         CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-interp-deriv-nontensor.h", &basis_kernel_path));
375940a72f1SSebastian Grimberg         CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
376*9d15e85bSSebastian Grimberg         CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
377940a72f1SSebastian Grimberg         CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
378*9d15e85bSSebastian Grimberg         CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module_interp[iN], 8, "BASIS_Q_COMP_INTERP", q_comp_interp,
379*9d15e85bSSebastian Grimberg                                          "BASIS_Q_COMP_DERIV", q_comp_deriv, "BASIS_P", P, "BASIS_Q", Q, "BASIS_NB_INTERP_N", impl->NB_interp[iN],
380*9d15e85bSSebastian Grimberg                                          "BASIS_NB_INTERP_T", impl->NB_interp_t[iN], "BASIS_NB_DERIV_N", impl->NB_deriv[iN], "BASIS_NB_DERIV_T",
381*9d15e85bSSebastian Grimberg                                          impl->NB_deriv_t[iN]));
382940a72f1SSebastian Grimberg         CeedCallBackend(CeedGetKernelMagma(ceed, impl->module_interp[iN], "magma_interp_nontensor_n", &impl->Interp[iN]));
383940a72f1SSebastian Grimberg         CeedCallBackend(CeedGetKernelMagma(ceed, impl->module_interp[iN], "magma_interp_nontensor_t", &impl->InterpTranspose[iN]));
384*9d15e85bSSebastian Grimberg         CeedCallBackend(CeedGetKernelMagma(ceed, impl->module_interp[iN], "magma_deriv_nontensor_n", &impl->Deriv[iN]));
385*9d15e85bSSebastian Grimberg         CeedCallBackend(CeedGetKernelMagma(ceed, impl->module_interp[iN], "magma_deriv_nontensor_t", &impl->DerivTranspose[iN]));
386*9d15e85bSSebastian Grimberg         CeedCallBackend(CeedFree(&basis_kernel_path));
387940a72f1SSebastian Grimberg         CeedCallBackend(CeedFree(&basis_kernel_source));
388940a72f1SSebastian Grimberg       }
389*9d15e85bSSebastian Grimberg       CeedMagmaFunction Kernel;
390*9d15e85bSSebastian Grimberg       CeedInt           NB;
391*9d15e85bSSebastian Grimberg       if (e_mode == CEED_EVAL_INTERP) {
392*9d15e85bSSebastian Grimberg         if (t_mode == CEED_TRANSPOSE) {
393*9d15e85bSSebastian Grimberg           Kernel = impl->InterpTranspose[iN];
394*9d15e85bSSebastian Grimberg           NB     = impl->NB_interp_t[iN];
395*9d15e85bSSebastian Grimberg         } else {
396*9d15e85bSSebastian Grimberg           Kernel = impl->Interp[iN];
397*9d15e85bSSebastian Grimberg           NB     = impl->NB_interp[iN];
398*9d15e85bSSebastian Grimberg         }
399*9d15e85bSSebastian Grimberg       } else {
400*9d15e85bSSebastian Grimberg         if (t_mode == CEED_TRANSPOSE) {
401*9d15e85bSSebastian Grimberg           Kernel = impl->DerivTranspose[iN];
402*9d15e85bSSebastian Grimberg           NB     = impl->NB_deriv_t[iN];
403*9d15e85bSSebastian Grimberg         } else {
404*9d15e85bSSebastian Grimberg           Kernel = impl->Deriv[iN];
405*9d15e85bSSebastian Grimberg           NB     = impl->NB_deriv[iN];
406*9d15e85bSSebastian Grimberg         }
407*9d15e85bSSebastian Grimberg       }
408940a72f1SSebastian Grimberg       CeedInt num_t_col    = MAGMA_BASIS_NTCOL(M, MAGMA_MAXTHREADS_1D);
409*9d15e85bSSebastian Grimberg       CeedInt grid         = CeedDivUpInt(N, num_t_col * NB);
410*9d15e85bSSebastian Grimberg       CeedInt shared_mem_A = (t_mode == CEED_TRANSPOSE) ? 0 : P * Q * sizeof(CeedScalar);
411940a72f1SSebastian Grimberg       CeedInt shared_mem_B = num_t_col * K * NB * sizeof(CeedScalar);
412*9d15e85bSSebastian Grimberg       CeedInt shared_mem   = (t_mode == CEED_TRANSPOSE || q_comp > 1) ? (shared_mem_A + shared_mem_B) : CeedIntMax(shared_mem_A, shared_mem_B);
413*9d15e85bSSebastian Grimberg       void   *args[]       = {&N, &d_b, &d_u, &d_v};
414940a72f1SSebastian Grimberg 
415*9d15e85bSSebastian Grimberg       CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, Kernel, grid, M, num_t_col, 1, shared_mem, args));
416*9d15e85bSSebastian Grimberg     } else {
417*9d15e85bSSebastian Grimberg       for (CeedInt d = 0; d < q_comp; d++) {
41838293ee6SJeremy L Thompson         if (t_mode == CEED_TRANSPOSE) {
419940a72f1SSebastian Grimberg           const CeedScalar beta = (d > 0) ? 1.0 : 0.0;
420*9d15e85bSSebastian Grimberg           magma_gemm_nontensor(MagmaNoTrans, MagmaNoTrans, P, N, Q, 1.0, d_b + d * P * Q, P, d_u + d * N * Q, Q, beta, d_v, P, data->queue);
421940a72f1SSebastian Grimberg         } else {
422*9d15e85bSSebastian Grimberg           magma_gemm_nontensor(MagmaTrans, MagmaNoTrans, Q, N, P, 1.0, d_b + d * P * Q, P, d_u, P, 0.0, d_v + d * N * Q, Q, data->queue);
423940a72f1SSebastian Grimberg         }
424940a72f1SSebastian Grimberg       }
425940a72f1SSebastian Grimberg     }
426940a72f1SSebastian Grimberg   } else {
427940a72f1SSebastian Grimberg     CeedCheck(t_mode != CEED_TRANSPOSE, ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
428940a72f1SSebastian Grimberg     CeedInt num_t_col  = MAGMA_BASIS_NTCOL(Q, MAGMA_MAXTHREADS_1D);
429940a72f1SSebastian Grimberg     CeedInt grid       = CeedDivUpInt(num_elem, num_t_col);
430940a72f1SSebastian Grimberg     CeedInt shared_mem = Q * sizeof(CeedScalar) + num_t_col * Q * sizeof(CeedScalar);
431*9d15e85bSSebastian Grimberg     void   *args[]     = {&num_elem, &impl->d_q_weight, &d_v};
432868539c2SNatalie Beams 
433940a72f1SSebastian Grimberg     CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Weight, grid, Q, num_t_col, 1, shared_mem, args));
434940a72f1SSebastian Grimberg   }
435940a72f1SSebastian Grimberg 
436940a72f1SSebastian Grimberg   // Must sync to ensure completeness
437e0582403Sabdelfattah83   ceed_magma_queue_sync(data->queue);
438e0582403Sabdelfattah83 
439940a72f1SSebastian Grimberg   // Restore vectors
44038293ee6SJeremy L Thompson   if (e_mode != CEED_EVAL_WEIGHT) {
441940a72f1SSebastian Grimberg     CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
442868539c2SNatalie Beams   }
443940a72f1SSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
444e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
445868539c2SNatalie Beams }
446868539c2SNatalie Beams 
447940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
448940a72f1SSebastian Grimberg // Destroy tensor basis
449940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
450940a72f1SSebastian Grimberg static int CeedBasisDestroy_Magma(CeedBasis basis) {
451f6af633fSnbeams   Ceed             ceed;
45238293ee6SJeremy L Thompson   CeedBasis_Magma *impl;
45338293ee6SJeremy L Thompson 
4542b730f8bSJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
455940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetData(basis, &impl));
456e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP
4572b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipModuleUnload(impl->module));
458f6af633fSnbeams #else
4592b730f8bSJeremy L Thompson   CeedCallCuda(ceed, cuModuleUnload(impl->module));
460f6af633fSnbeams #endif
461940a72f1SSebastian Grimberg   CeedCallBackend(magma_free(impl->d_interp_1d));
462940a72f1SSebastian Grimberg   CeedCallBackend(magma_free(impl->d_grad_1d));
463940a72f1SSebastian Grimberg   CeedCallBackend(magma_free(impl->d_q_weight_1d));
4642b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
465e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4667f5b9731SStan Tomov }
4677f5b9731SStan Tomov 
468940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
469940a72f1SSebastian Grimberg // Destroy non-tensor basis
470940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
471940a72f1SSebastian Grimberg static int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) {
472023b8a51Sabdelfattah83   Ceed                      ceed;
47338293ee6SJeremy L Thompson   CeedBasisNonTensor_Magma *impl;
47438293ee6SJeremy L Thompson 
475940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
47638293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetData(basis, &impl));
477940a72f1SSebastian Grimberg #ifdef CEED_MAGMA_USE_HIP
478940a72f1SSebastian Grimberg   CeedCallHip(ceed, hipModuleUnload(impl->module_weight));
479940a72f1SSebastian Grimberg #else
480940a72f1SSebastian Grimberg   CeedCallCuda(ceed, cuModuleUnload(impl->module_weight));
481940a72f1SSebastian Grimberg #endif
482940a72f1SSebastian Grimberg   for (CeedInt in = 0; in < MAGMA_NONTENSOR_KERNEL_INSTANCES; in++) {
483940a72f1SSebastian Grimberg     if (impl->module_interp[in]) {
484940a72f1SSebastian Grimberg #ifdef CEED_MAGMA_USE_HIP
485940a72f1SSebastian Grimberg       CeedCallHip(ceed, hipModuleUnload(impl->module_interp[in]));
486940a72f1SSebastian Grimberg #else
487940a72f1SSebastian Grimberg       CeedCallCuda(ceed, cuModuleUnload(impl->module_interp[in]));
488940a72f1SSebastian Grimberg #endif
489940a72f1SSebastian Grimberg     }
490940a72f1SSebastian Grimberg   }
49138293ee6SJeremy L Thompson   CeedCallBackend(magma_free(impl->d_interp));
49238293ee6SJeremy L Thompson   CeedCallBackend(magma_free(impl->d_grad));
493*9d15e85bSSebastian Grimberg   CeedCallBackend(magma_free(impl->d_div));
494*9d15e85bSSebastian Grimberg   CeedCallBackend(magma_free(impl->d_curl));
49538293ee6SJeremy L Thompson   CeedCallBackend(magma_free(impl->d_q_weight));
4962b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
497e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
498868539c2SNatalie Beams }
499868539c2SNatalie Beams 
500940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
501940a72f1SSebastian Grimberg // Create tensor
502940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
503940a72f1SSebastian Grimberg int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
50438293ee6SJeremy L Thompson                                   const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) {
50538293ee6SJeremy L Thompson   Ceed             ceed, ceed_delegate;
50638293ee6SJeremy L Thompson   Ceed_Magma      *data;
507940a72f1SSebastian Grimberg   char            *interp_kernel_path, *grad_kernel_path, *weight_kernel_path, *basis_kernel_source;
508940a72f1SSebastian Grimberg   CeedInt          num_comp;
5097f5b9731SStan Tomov   CeedBasis_Magma *impl;
51038293ee6SJeremy L Thompson 
5112b730f8bSJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
5122b730f8bSJeremy L Thompson   CeedCallBackend(CeedGetData(ceed, &data));
513940a72f1SSebastian Grimberg   CeedCallBackend(CeedCalloc(1, &impl));
514e0582403Sabdelfattah83 
515940a72f1SSebastian Grimberg   // Copy basis data to GPU
516940a72f1SSebastian Grimberg   CeedCallBackend(magma_malloc((void **)&impl->d_q_weight_1d, Q_1d * sizeof(q_weight_1d[0])));
517940a72f1SSebastian Grimberg   magma_setvector(Q_1d, sizeof(q_weight_1d[0]), q_weight_1d, 1, impl->d_q_weight_1d, 1, data->queue);
51838293ee6SJeremy L Thompson   CeedCallBackend(magma_malloc((void **)&impl->d_interp_1d, Q_1d * P_1d * sizeof(interp_1d[0])));
51938293ee6SJeremy L Thompson   magma_setvector(Q_1d * P_1d, sizeof(interp_1d[0]), interp_1d, 1, impl->d_interp_1d, 1, data->queue);
52038293ee6SJeremy L Thompson   CeedCallBackend(magma_malloc((void **)&impl->d_grad_1d, Q_1d * P_1d * sizeof(grad_1d[0])));
52138293ee6SJeremy L Thompson   magma_setvector(Q_1d * P_1d, sizeof(grad_1d[0]), grad_1d, 1, impl->d_grad_1d, 1, data->queue);
5227f5b9731SStan Tomov 
523940a72f1SSebastian Grimberg   // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data
524940a72f1SSebastian Grimberg   CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate));
525940a72f1SSebastian Grimberg 
526940a72f1SSebastian Grimberg   // Compile kernels
527940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
528940a72f1SSebastian Grimberg   {
529940a72f1SSebastian Grimberg     char   *interp_kernel_name_base = "ceed/jit-source/magma/magma-basis-interp";
530940a72f1SSebastian Grimberg     CeedInt interp_kernel_name_len  = strlen(interp_kernel_name_base) + 6;
531940a72f1SSebastian Grimberg     char    interp_kernel_name[interp_kernel_name_len];
532940a72f1SSebastian Grimberg 
533940a72f1SSebastian Grimberg     snprintf(interp_kernel_name, interp_kernel_name_len, "%s-%" CeedInt_FMT "d.h", interp_kernel_name_base, dim);
534940a72f1SSebastian Grimberg     CeedCallBackend(CeedGetJitAbsolutePath(ceed, interp_kernel_name, &interp_kernel_path));
535940a72f1SSebastian Grimberg   }
536940a72f1SSebastian Grimberg   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
537940a72f1SSebastian Grimberg   CeedCallBackend(CeedLoadSourceToBuffer(ceed, interp_kernel_path, &basis_kernel_source));
538940a72f1SSebastian Grimberg   {
539940a72f1SSebastian Grimberg     char   *grad_kernel_name_base = "ceed/jit-source/magma/magma-basis-grad";
540940a72f1SSebastian Grimberg     CeedInt grad_kernel_name_len  = strlen(grad_kernel_name_base) + 6;
541940a72f1SSebastian Grimberg     char    grad_kernel_name[grad_kernel_name_len];
542940a72f1SSebastian Grimberg 
543940a72f1SSebastian Grimberg     snprintf(grad_kernel_name, grad_kernel_name_len, "%s-%" CeedInt_FMT "d.h", grad_kernel_name_base, dim);
544940a72f1SSebastian Grimberg     CeedCallBackend(CeedGetJitAbsolutePath(ceed, grad_kernel_name, &grad_kernel_path));
545940a72f1SSebastian Grimberg   }
546940a72f1SSebastian Grimberg   CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, grad_kernel_path, &basis_kernel_source));
547940a72f1SSebastian Grimberg   {
548940a72f1SSebastian Grimberg     char   *weight_kernel_name_base = "ceed/jit-source/magma/magma-basis-weight";
549940a72f1SSebastian Grimberg     CeedInt weight_kernel_name_len  = strlen(weight_kernel_name_base) + 6;
550940a72f1SSebastian Grimberg     char    weight_kernel_name[weight_kernel_name_len];
551940a72f1SSebastian Grimberg 
552940a72f1SSebastian Grimberg     snprintf(weight_kernel_name, weight_kernel_name_len, "%s-%" CeedInt_FMT "d.h", weight_kernel_name_base, dim);
553940a72f1SSebastian Grimberg     CeedCallBackend(CeedGetJitAbsolutePath(ceed, weight_kernel_name, &weight_kernel_path));
554940a72f1SSebastian Grimberg   }
555940a72f1SSebastian Grimberg   CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, weight_kernel_path, &basis_kernel_source));
556940a72f1SSebastian Grimberg   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
557940a72f1SSebastian Grimberg   CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module, 5, "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_P",
558940a72f1SSebastian Grimberg                                    P_1d, "BASIS_Q", Q_1d, "BASIS_MAX_P_Q", CeedIntMax(P_1d, Q_1d)));
559940a72f1SSebastian Grimberg   switch (dim) {
560940a72f1SSebastian Grimberg     case 1:
561940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_1d_kernel", &impl->Interp));
562940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_1d_kernel", &impl->InterpTranspose));
563940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_1d_kernel", &impl->Grad));
564940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_1d_kernel", &impl->GradTranspose));
565940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_1d_kernel", &impl->Weight));
566940a72f1SSebastian Grimberg       break;
567940a72f1SSebastian Grimberg     case 2:
568940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_2d_kernel", &impl->Interp));
569940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_2d_kernel", &impl->InterpTranspose));
570940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_2d_kernel", &impl->Grad));
571940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_2d_kernel", &impl->GradTranspose));
572940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_2d_kernel", &impl->Weight));
573940a72f1SSebastian Grimberg       break;
574940a72f1SSebastian Grimberg     case 3:
575940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_3d_kernel", &impl->Interp));
576940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_3d_kernel", &impl->InterpTranspose));
577940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_3d_kernel", &impl->Grad));
578940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_3d_kernel", &impl->GradTranspose));
579940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_3d_kernel", &impl->Weight));
580940a72f1SSebastian Grimberg       break;
581940a72f1SSebastian Grimberg   }
582940a72f1SSebastian Grimberg   CeedCallBackend(CeedFree(&interp_kernel_path));
583940a72f1SSebastian Grimberg   CeedCallBackend(CeedFree(&grad_kernel_path));
584940a72f1SSebastian Grimberg   CeedCallBackend(CeedFree(&weight_kernel_path));
585940a72f1SSebastian Grimberg   CeedCallBackend(CeedFree(&basis_kernel_source));
5867f5b9731SStan Tomov 
5872b730f8bSJeremy L Thompson   CeedCallBackend(CeedBasisSetData(basis, impl));
588940a72f1SSebastian Grimberg 
589940a72f1SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Magma));
590940a72f1SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Magma));
591e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
5927f5b9731SStan Tomov }
5937f5b9731SStan Tomov 
594940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
595940a72f1SSebastian Grimberg // Create non-tensor H^1
596940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
597940a72f1SSebastian Grimberg int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad,
59838293ee6SJeremy L Thompson                             const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
59938293ee6SJeremy L Thompson   Ceed                      ceed, ceed_delegate;
600e0582403Sabdelfattah83   Ceed_Magma               *data;
601940a72f1SSebastian Grimberg   char                     *weight_kernel_path, *basis_kernel_source;
60238293ee6SJeremy L Thompson   CeedBasisNonTensor_Magma *impl;
60338293ee6SJeremy L Thompson 
60438293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
6052b730f8bSJeremy L Thompson   CeedCallBackend(CeedGetData(ceed, &data));
6062b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &impl));
607023b8a51Sabdelfattah83 
608940a72f1SSebastian Grimberg   // Copy basis data to GPU
60938293ee6SJeremy L Thompson   CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0])));
61038293ee6SJeremy L Thompson   magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue);
611*9d15e85bSSebastian Grimberg   if (interp) {
612*9d15e85bSSebastian Grimberg     CeedInt q_comp_interp;
613*9d15e85bSSebastian Grimberg 
614*9d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
615*9d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0])));
616*9d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue);
617*9d15e85bSSebastian Grimberg   }
618*9d15e85bSSebastian Grimberg   if (grad) {
619*9d15e85bSSebastian Grimberg     CeedInt q_comp_grad;
620*9d15e85bSSebastian Grimberg 
621*9d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad));
622*9d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_grad, num_qpts * num_nodes * q_comp_grad * sizeof(grad[0])));
623*9d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_grad, sizeof(grad[0]), grad, 1, impl->d_grad, 1, data->queue);
624*9d15e85bSSebastian Grimberg   }
625*9d15e85bSSebastian Grimberg 
626*9d15e85bSSebastian Grimberg   // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data
627*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate));
628*9d15e85bSSebastian Grimberg 
629*9d15e85bSSebastian Grimberg   // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply)
630*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path));
631*9d15e85bSSebastian Grimberg   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
632*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source));
633*9d15e85bSSebastian Grimberg   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
634*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module_weight, 1, "BASIS_Q", num_qpts));
635*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedGetKernelMagma(ceed, impl->module_weight, "magma_weight_nontensor", &impl->Weight));
636*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedFree(&weight_kernel_path));
637*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedFree(&basis_kernel_source));
638*9d15e85bSSebastian Grimberg 
639*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedBasisSetData(basis, impl));
640*9d15e85bSSebastian Grimberg 
641*9d15e85bSSebastian Grimberg   // Register backend functions
642*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma));
643*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma));
644*9d15e85bSSebastian Grimberg   return CEED_ERROR_SUCCESS;
645*9d15e85bSSebastian Grimberg }
646*9d15e85bSSebastian Grimberg 
647*9d15e85bSSebastian Grimberg //------------------------------------------------------------------------------
648*9d15e85bSSebastian Grimberg // Create non-tensor H(div)
649*9d15e85bSSebastian Grimberg //------------------------------------------------------------------------------
650*9d15e85bSSebastian Grimberg int CeedBasisCreateHdiv_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
651*9d15e85bSSebastian Grimberg                               const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
652*9d15e85bSSebastian Grimberg   Ceed                      ceed, ceed_delegate;
653*9d15e85bSSebastian Grimberg   Ceed_Magma               *data;
654*9d15e85bSSebastian Grimberg   char                     *weight_kernel_path, *basis_kernel_source;
655*9d15e85bSSebastian Grimberg   CeedBasisNonTensor_Magma *impl;
656*9d15e85bSSebastian Grimberg 
657*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
658*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedGetData(ceed, &data));
659*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedCalloc(1, &impl));
660*9d15e85bSSebastian Grimberg 
661*9d15e85bSSebastian Grimberg   // Copy basis data to GPU
662*9d15e85bSSebastian Grimberg   CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0])));
663*9d15e85bSSebastian Grimberg   magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue);
664*9d15e85bSSebastian Grimberg   if (interp) {
665*9d15e85bSSebastian Grimberg     CeedInt q_comp_interp;
666*9d15e85bSSebastian Grimberg 
667*9d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
668*9d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0])));
669*9d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue);
670*9d15e85bSSebastian Grimberg   }
671*9d15e85bSSebastian Grimberg   if (div) {
672*9d15e85bSSebastian Grimberg     CeedInt q_comp_div;
673*9d15e85bSSebastian Grimberg 
674*9d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div));
675*9d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_div, num_qpts * num_nodes * q_comp_div * sizeof(div[0])));
676*9d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_div, sizeof(div[0]), div, 1, impl->d_div, 1, data->queue);
677*9d15e85bSSebastian Grimberg   }
678*9d15e85bSSebastian Grimberg 
679*9d15e85bSSebastian Grimberg   // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data
680*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate));
681*9d15e85bSSebastian Grimberg 
682*9d15e85bSSebastian Grimberg   // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply)
683*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path));
684*9d15e85bSSebastian Grimberg   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
685*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source));
686*9d15e85bSSebastian Grimberg   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
687*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module_weight, 1, "BASIS_Q", num_qpts));
688*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedGetKernelMagma(ceed, impl->module_weight, "magma_weight_nontensor", &impl->Weight));
689*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedFree(&weight_kernel_path));
690*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedFree(&basis_kernel_source));
691*9d15e85bSSebastian Grimberg 
692*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedBasisSetData(basis, impl));
693*9d15e85bSSebastian Grimberg 
694*9d15e85bSSebastian Grimberg   // Register backend functions
695*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma));
696*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma));
697*9d15e85bSSebastian Grimberg   return CEED_ERROR_SUCCESS;
698*9d15e85bSSebastian Grimberg }
699*9d15e85bSSebastian Grimberg 
700*9d15e85bSSebastian Grimberg //------------------------------------------------------------------------------
701*9d15e85bSSebastian Grimberg // Create non-tensor H(curl)
702*9d15e85bSSebastian Grimberg //------------------------------------------------------------------------------
703*9d15e85bSSebastian Grimberg int CeedBasisCreateHcurl_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
704*9d15e85bSSebastian Grimberg                                const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
705*9d15e85bSSebastian Grimberg   Ceed                      ceed, ceed_delegate;
706*9d15e85bSSebastian Grimberg   Ceed_Magma               *data;
707*9d15e85bSSebastian Grimberg   char                     *weight_kernel_path, *basis_kernel_source;
708*9d15e85bSSebastian Grimberg   CeedBasisNonTensor_Magma *impl;
709*9d15e85bSSebastian Grimberg 
710*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
711*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedGetData(ceed, &data));
712*9d15e85bSSebastian Grimberg   CeedCallBackend(CeedCalloc(1, &impl));
713*9d15e85bSSebastian Grimberg 
714*9d15e85bSSebastian Grimberg   // Copy basis data to GPU
715*9d15e85bSSebastian Grimberg   CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0])));
716*9d15e85bSSebastian Grimberg   magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue);
717*9d15e85bSSebastian Grimberg   if (interp) {
718*9d15e85bSSebastian Grimberg     CeedInt q_comp_interp;
719*9d15e85bSSebastian Grimberg 
720*9d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
721*9d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0])));
722*9d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue);
723*9d15e85bSSebastian Grimberg   }
724*9d15e85bSSebastian Grimberg   if (curl) {
725*9d15e85bSSebastian Grimberg     CeedInt q_comp_curl;
726*9d15e85bSSebastian Grimberg 
727*9d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl));
728*9d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_curl, num_qpts * num_nodes * q_comp_curl * sizeof(curl[0])));
729*9d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_curl, sizeof(curl[0]), curl, 1, impl->d_curl, 1, data->queue);
730*9d15e85bSSebastian Grimberg   }
731940a72f1SSebastian Grimberg 
732940a72f1SSebastian Grimberg   // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data
733940a72f1SSebastian Grimberg   CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate));
734940a72f1SSebastian Grimberg 
735940a72f1SSebastian Grimberg   // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply)
736940a72f1SSebastian Grimberg   CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path));
737940a72f1SSebastian Grimberg   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
738940a72f1SSebastian Grimberg   CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source));
739940a72f1SSebastian Grimberg   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
740940a72f1SSebastian Grimberg   CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module_weight, 1, "BASIS_Q", num_qpts));
741940a72f1SSebastian Grimberg   CeedCallBackend(CeedGetKernelMagma(ceed, impl->module_weight, "magma_weight_nontensor", &impl->Weight));
742940a72f1SSebastian Grimberg   CeedCallBackend(CeedFree(&weight_kernel_path));
743940a72f1SSebastian Grimberg   CeedCallBackend(CeedFree(&basis_kernel_source));
744868539c2SNatalie Beams 
745023b8a51Sabdelfattah83   CeedCallBackend(CeedBasisSetData(basis, impl));
746940a72f1SSebastian Grimberg 
747940a72f1SSebastian Grimberg   // Register backend functions
748940a72f1SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma));
749940a72f1SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma));
750e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
7517f5b9731SStan Tomov }
752940a72f1SSebastian Grimberg 
753940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
754