xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision 868539c291cd6e4adc5c1e2f0ea123f6c9e198f6)
17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details.
47f5b9731SStan Tomov //
57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and
87f5b9731SStan Tomov // source code availability see http://github.com/ceed.
97f5b9731SStan Tomov //
107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early
157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
167f5b9731SStan Tomov 
177f5b9731SStan Tomov #include "ceed-magma.h"
187f5b9731SStan Tomov 
197f5b9731SStan Tomov #ifdef __cplusplus
207f5b9731SStan Tomov CEED_INTERN "C"
217f5b9731SStan Tomov #endif
227f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
237f5b9731SStan Tomov                          CeedTransposeMode tmode, CeedEvalMode emode,
243513a710Sjeremylt                          CeedVector U, CeedVector V) {
257f5b9731SStan Tomov   int ierr;
267f5b9731SStan Tomov   Ceed ceed;
277f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
287f5b9731SStan Tomov   CeedInt dim, ncomp, ndof, nqpt;
297f5b9731SStan Tomov   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
307f5b9731SStan Tomov   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
317f5b9731SStan Tomov   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
327f5b9731SStan Tomov   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
337f5b9731SStan Tomov   const CeedScalar *u;
347f5b9731SStan Tomov   CeedScalar *v;
35*868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
367f5b9731SStan Tomov     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChk(ierr);
377f5b9731SStan Tomov   } else if (emode != CEED_EVAL_WEIGHT) {
387f5b9731SStan Tomov     // LCOV_EXCL_START
397f5b9731SStan Tomov     return CeedError(ceed, 1,
407f5b9731SStan Tomov                      "An input vector is required for this CeedEvalMode");
417f5b9731SStan Tomov     // LCOV_EXCL_STOP
427f5b9731SStan Tomov   }
437f5b9731SStan Tomov   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChk(ierr);
447f5b9731SStan Tomov 
457f5b9731SStan Tomov   CeedBasis_Magma *impl;
467f5b9731SStan Tomov   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
477f5b9731SStan Tomov 
487f5b9731SStan Tomov   CeedInt P1d, Q1d;
497f5b9731SStan Tomov   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr);
507f5b9731SStan Tomov   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr);
517f5b9731SStan Tomov 
527f5b9731SStan Tomov   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d",
537f5b9731SStan Tomov             ncomp*CeedIntPow(P1d, dim), ncomp);
547f5b9731SStan Tomov 
557f5b9731SStan Tomov   if (tmode == CEED_TRANSPOSE) {
567f5b9731SStan Tomov     CeedInt length;
577f5b9731SStan Tomov     ierr = CeedVectorGetLength(V, &length);
587f5b9731SStan Tomov     magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length);
597f5b9731SStan Tomov   }
603513a710Sjeremylt   switch (emode) {
613513a710Sjeremylt   case CEED_EVAL_INTERP: {
627f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
637f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
647f5b9731SStan Tomov       P = Q1d; Q = P1d;
657f5b9731SStan Tomov     }
667f5b9731SStan Tomov 
677f5b9731SStan Tomov     // Define element sizes for dofs/quad
687f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
697f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
707f5b9731SStan Tomov 
717f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
72*868539c2SNatalie Beams     //  component                        component
73*868539c2SNatalie Beams     //    elem                             elem
747f5b9731SStan Tomov     //       node                            node
757f5b9731SStan Tomov 
767f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
777f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
787f5b9731SStan Tomov 
797f5b9731SStan Tomov     // Element strides
80*868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
817f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
827f5b9731SStan Tomov     // Component strides
83*868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
847f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
857f5b9731SStan Tomov 
867f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
877f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
887f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
897f5b9731SStan Tomov       // Element strides
90*868539c2SNatalie Beams       v_elstride = eldofssize;
917f5b9731SStan Tomov       u_elstride = elquadsize;
927f5b9731SStan Tomov       // Component strides
93*868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
947f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
957f5b9731SStan Tomov     }
967f5b9731SStan Tomov 
977f5b9731SStan Tomov     // Loop through components and apply batch over elements
987f5b9731SStan Tomov     magmablas_dbasis_apply_batched_eval_interp(P, Q, dim, ncomp,
997f5b9731SStan Tomov         impl->dinterp1d, tmode,
100*868539c2SNatalie Beams         u, u_elstride, u_compstride,
101*868539c2SNatalie Beams         v, v_elstride, v_compstride,
1027f5b9731SStan Tomov         nelem);
1037f5b9731SStan Tomov   }
1043513a710Sjeremylt   break;
1053513a710Sjeremylt   case CEED_EVAL_GRAD: {
1067f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
1077f5b9731SStan Tomov     // In CEED_NOTRANSPOSE mode:
1087f5b9731SStan Tomov     // u is (P^dim x nc), column-major layout (nc = ncomp)
1097f5b9731SStan Tomov     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
1107f5b9731SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
1117f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1127f5b9731SStan Tomov       P = Q1d, Q = P1d;
1137f5b9731SStan Tomov     }
1147f5b9731SStan Tomov 
1157f5b9731SStan Tomov     // Define element sizes for dofs/quad
1167f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
1177f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
1187f5b9731SStan Tomov 
1197f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
1207f5b9731SStan Tomov     //                                  dim
121*868539c2SNatalie Beams     //  component                        component
122*868539c2SNatalie Beams     //    elem                              elem
1237f5b9731SStan Tomov     //       node                            node
1247f5b9731SStan Tomov 
1257f5b9731SStan Tomov 
1267f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
1277f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
1287f5b9731SStan Tomov 
1297f5b9731SStan Tomov     // Element strides
130*868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
1317f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
1327f5b9731SStan Tomov     // Component strides
133*868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
1347f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
1357f5b9731SStan Tomov     // Dimension strides
1367f5b9731SStan Tomov     CeedInt u_dimstride = 0;
1377f5b9731SStan Tomov     CeedInt v_dimstride = nelem * elquadsize * ncomp;
1387f5b9731SStan Tomov 
1397f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
1407f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1417f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
1427f5b9731SStan Tomov       // Element strides
143*868539c2SNatalie Beams       v_elstride = eldofssize;
1447f5b9731SStan Tomov       u_elstride = elquadsize;
1457f5b9731SStan Tomov       // Component strides
146*868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
1477f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
1487f5b9731SStan Tomov       // Dimension strides
1497f5b9731SStan Tomov       v_dimstride = 0;
1507f5b9731SStan Tomov       u_dimstride = nelem * elquadsize * ncomp;
1517f5b9731SStan Tomov 
1527f5b9731SStan Tomov     }
1537f5b9731SStan Tomov 
154*868539c2SNatalie Beams     // Loop through grad dimensions only, batch call over elements and components
1553513a710Sjeremylt     for (CeedInt dim_ctr = 0; dim_ctr < dim; dim_ctr++)
1567f5b9731SStan Tomov       magmablas_dbasis_apply_batched_eval_grad(P, Q, dim, ncomp, nqpt,
1577f5b9731SStan Tomov           impl->dinterp1d, impl->dgrad1d, tmode,
158*868539c2SNatalie Beams           u + dim_ctr * u_dimstride, u_elstride, u_compstride, u_dimstride,
159*868539c2SNatalie Beams           v + dim_ctr * v_dimstride, v_elstride, v_compstride, v_dimstride,
160*868539c2SNatalie Beams           dim_ctr, nelem);
1617f5b9731SStan Tomov   }
1623513a710Sjeremylt   break;
1633513a710Sjeremylt   case CEED_EVAL_WEIGHT: {
1647f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE)
1657f5b9731SStan Tomov       // LCOV_EXCL_START
1667f5b9731SStan Tomov       return CeedError(ceed, 1,
1677f5b9731SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
1687f5b9731SStan Tomov     // LCOV_EXCL_STOP
1697f5b9731SStan Tomov     CeedInt Q = Q1d;
1707f5b9731SStan Tomov     int eldofssize = CeedIntPow(Q, dim);
1717f5b9731SStan Tomov     magmablas_dbasis_apply_batched_eval_weight(Q, dim, impl->dqweight1d,
1727f5b9731SStan Tomov         v, eldofssize,
1737f5b9731SStan Tomov         nelem);
1747f5b9731SStan Tomov   }
1753513a710Sjeremylt   break;
1763513a710Sjeremylt   // LCOV_EXCL_START
1773513a710Sjeremylt   case CEED_EVAL_DIV:
1783513a710Sjeremylt     return CeedError(ceed, 1, "CEED_EVAL_DIV not supported");
1793513a710Sjeremylt   case CEED_EVAL_CURL:
1803513a710Sjeremylt     return CeedError(ceed, 1, "CEED_EVAL_CURL not supported");
1813513a710Sjeremylt   case CEED_EVAL_NONE:
1823513a710Sjeremylt     return CeedError(ceed, 1,
1833513a710Sjeremylt                      "CEED_EVAL_NONE does not make sense in this context");
1843513a710Sjeremylt     // LCOV_EXCL_STOP
1853513a710Sjeremylt   }
1867f5b9731SStan Tomov 
1877f5b9731SStan Tomov   if (emode!=CEED_EVAL_WEIGHT) {
1887f5b9731SStan Tomov     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr);
1897f5b9731SStan Tomov   }
1907f5b9731SStan Tomov   ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr);
1917f5b9731SStan Tomov   return 0;
1927f5b9731SStan Tomov }
1937f5b9731SStan Tomov 
1947f5b9731SStan Tomov #ifdef __cplusplus
1957f5b9731SStan Tomov CEED_INTERN "C"
1967f5b9731SStan Tomov #endif
197*868539c2SNatalie Beams int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt nelem,
198*868539c2SNatalie Beams                                   CeedTransposeMode tmode, CeedEvalMode emode,
199*868539c2SNatalie Beams                                   CeedVector U, CeedVector V) {
200*868539c2SNatalie Beams   int ierr;
201*868539c2SNatalie Beams   Ceed ceed;
202*868539c2SNatalie Beams   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
203*868539c2SNatalie Beams   CeedInt dim, ncomp, ndof, nqpt;
204*868539c2SNatalie Beams   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
205*868539c2SNatalie Beams   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
206*868539c2SNatalie Beams   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
207*868539c2SNatalie Beams   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
208*868539c2SNatalie Beams   const CeedScalar *du;
209*868539c2SNatalie Beams   CeedScalar *dv;
210*868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
211*868539c2SNatalie Beams     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChk(ierr);
212*868539c2SNatalie Beams   } else if (emode != CEED_EVAL_WEIGHT) {
213*868539c2SNatalie Beams     // LCOV_EXCL_START
214*868539c2SNatalie Beams     return CeedError(ceed, 1,
215*868539c2SNatalie Beams                      "An input vector is required for this CeedEvalMode");
216*868539c2SNatalie Beams     // LCOV_EXCL_STOP
217*868539c2SNatalie Beams   }
218*868539c2SNatalie Beams   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChk(ierr);
219*868539c2SNatalie Beams 
220*868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
221*868539c2SNatalie Beams   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
222*868539c2SNatalie Beams 
223*868539c2SNatalie Beams   CeedDebug("\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d",
224*868539c2SNatalie Beams             ncomp*ndof, ncomp);
225*868539c2SNatalie Beams 
226*868539c2SNatalie Beams   if (tmode == CEED_TRANSPOSE) {
227*868539c2SNatalie Beams     CeedInt length;
228*868539c2SNatalie Beams     ierr = CeedVectorGetLength(V, &length);
229*868539c2SNatalie Beams     magmablas_dlaset(MagmaFull, length, 1, 0., 0., dv, length);
230*868539c2SNatalie Beams   }
231*868539c2SNatalie Beams   switch (emode) {
232*868539c2SNatalie Beams   case CEED_EVAL_INTERP: {
233*868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
234*868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
235*868539c2SNatalie Beams       magma_dgemm(MagmaNoTrans, MagmaNoTrans,
236*868539c2SNatalie Beams                   P, nelem*ncomp, Q,
237*868539c2SNatalie Beams                   1.0, impl->dinterp, P,
238*868539c2SNatalie Beams                   du, Q,
239*868539c2SNatalie Beams                   0.0, dv, P);
240*868539c2SNatalie Beams     else
241*868539c2SNatalie Beams       magma_dgemm(MagmaTrans, MagmaNoTrans,
242*868539c2SNatalie Beams                   Q, nelem*ncomp, P,
243*868539c2SNatalie Beams                   1.0, impl->dinterp, P,
244*868539c2SNatalie Beams                   du, P,
245*868539c2SNatalie Beams                   0.0, dv, Q);
246*868539c2SNatalie Beams   }
247*868539c2SNatalie Beams   break;
248*868539c2SNatalie Beams 
249*868539c2SNatalie Beams   case CEED_EVAL_GRAD: {
250*868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
251*868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE) {
252*868539c2SNatalie Beams       double beta = 0.0;
253*868539c2SNatalie Beams       for(int d=0; d<dim; d++) {
254*868539c2SNatalie Beams         if (d>0)
255*868539c2SNatalie Beams           beta = 1.0;
256*868539c2SNatalie Beams         magma_dgemm(MagmaNoTrans, MagmaNoTrans,
257*868539c2SNatalie Beams                     P, nelem*ncomp, Q,
258*868539c2SNatalie Beams                     1.0, impl->dgrad + d*P*Q, P,
259*868539c2SNatalie Beams                     du + d*nelem*ncomp*Q, Q,
260*868539c2SNatalie Beams                     beta, dv, P);
261*868539c2SNatalie Beams       }
262*868539c2SNatalie Beams     } else {
263*868539c2SNatalie Beams       for(int d=0; d< dim; d++)
264*868539c2SNatalie Beams         magma_dgemm(MagmaTrans, MagmaNoTrans,
265*868539c2SNatalie Beams                     Q, nelem*ncomp, P,
266*868539c2SNatalie Beams                     1.0, impl->dgrad + d*P*Q, P,
267*868539c2SNatalie Beams                     du, P,
268*868539c2SNatalie Beams                     0.0, dv + d*nelem*ncomp*Q, Q);
269*868539c2SNatalie Beams     }
270*868539c2SNatalie Beams   }
271*868539c2SNatalie Beams   break;
272*868539c2SNatalie Beams 
273*868539c2SNatalie Beams   case CEED_EVAL_WEIGHT: {
274*868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
275*868539c2SNatalie Beams       // LCOV_EXCL_START
276*868539c2SNatalie Beams       return CeedError(ceed, 1,
277*868539c2SNatalie Beams                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
278*868539c2SNatalie Beams     // LCOV_EXCL_STOP
279*868539c2SNatalie Beams 
280*868539c2SNatalie Beams     int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1;
281*868539c2SNatalie Beams     int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)?
282*868539c2SNatalie Beams                                        1 : 0 );
283*868539c2SNatalie Beams     magma_weight(grid, nqpt, nelem, nqpt, impl->dqweight, dv);
284*868539c2SNatalie Beams     CeedChk(ierr);
285*868539c2SNatalie Beams   }
286*868539c2SNatalie Beams   break;
287*868539c2SNatalie Beams 
288*868539c2SNatalie Beams   // LCOV_EXCL_START
289*868539c2SNatalie Beams   case CEED_EVAL_DIV:
290*868539c2SNatalie Beams     return CeedError(ceed, 1, "CEED_EVAL_DIV not supported");
291*868539c2SNatalie Beams   case CEED_EVAL_CURL:
292*868539c2SNatalie Beams     return CeedError(ceed, 1, "CEED_EVAL_CURL not supported");
293*868539c2SNatalie Beams   case CEED_EVAL_NONE:
294*868539c2SNatalie Beams     return CeedError(ceed, 1,
295*868539c2SNatalie Beams                      "CEED_EVAL_NONE does not make sense in this context");
296*868539c2SNatalie Beams     // LCOV_EXCL_STOP
297*868539c2SNatalie Beams   }
298*868539c2SNatalie Beams 
299*868539c2SNatalie Beams   if(emode!=CEED_EVAL_WEIGHT) {
300*868539c2SNatalie Beams     ierr = CeedVectorRestoreArrayRead(U, &du); CeedChk(ierr);
301*868539c2SNatalie Beams   }
302*868539c2SNatalie Beams   ierr = CeedVectorRestoreArray(V, &dv); CeedChk(ierr);
303*868539c2SNatalie Beams   return 0;
304*868539c2SNatalie Beams }
305*868539c2SNatalie Beams 
306*868539c2SNatalie Beams #ifdef __cplusplus
307*868539c2SNatalie Beams CEED_INTERN "C"
308*868539c2SNatalie Beams #endif
3093513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) {
3107f5b9731SStan Tomov   int ierr;
3117f5b9731SStan Tomov   CeedBasis_Magma *impl;
3127f5b9731SStan Tomov   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
3137f5b9731SStan Tomov 
3147f5b9731SStan Tomov   ierr = magma_free(impl->dqref1d); CeedChk(ierr);
3157f5b9731SStan Tomov   ierr = magma_free(impl->dinterp1d); CeedChk(ierr);
3167f5b9731SStan Tomov   ierr = magma_free(impl->dgrad1d); CeedChk(ierr);
3177f5b9731SStan Tomov   ierr = magma_free(impl->dqweight1d); CeedChk(ierr);
3187f5b9731SStan Tomov 
3197f5b9731SStan Tomov   ierr = CeedFree(&impl); CeedChk(ierr);
3207f5b9731SStan Tomov 
3217f5b9731SStan Tomov   return 0;
3227f5b9731SStan Tomov }
3237f5b9731SStan Tomov 
3247f5b9731SStan Tomov #ifdef __cplusplus
3257f5b9731SStan Tomov CEED_INTERN "C"
3267f5b9731SStan Tomov #endif
327*868539c2SNatalie Beams int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) {
328*868539c2SNatalie Beams   int ierr;
329*868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
330*868539c2SNatalie Beams   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
331*868539c2SNatalie Beams 
332*868539c2SNatalie Beams   ierr = magma_free(impl->dqref); CeedChk(ierr);
333*868539c2SNatalie Beams   ierr = magma_free(impl->dinterp); CeedChk(ierr);
334*868539c2SNatalie Beams   ierr = magma_free(impl->dgrad); CeedChk(ierr);
335*868539c2SNatalie Beams   ierr = magma_free(impl->dqweight); CeedChk(ierr);
336*868539c2SNatalie Beams 
337*868539c2SNatalie Beams   ierr = CeedFree(&impl); CeedChk(ierr);
338*868539c2SNatalie Beams 
339*868539c2SNatalie Beams   return 0;
340*868539c2SNatalie Beams }
341*868539c2SNatalie Beams 
342*868539c2SNatalie Beams #ifdef __cplusplus
343*868539c2SNatalie Beams CEED_INTERN "C"
344*868539c2SNatalie Beams #endif
3453513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d,
3463513a710Sjeremylt                                   const CeedScalar *interp1d,
3477f5b9731SStan Tomov                                   const CeedScalar *grad1d,
3487f5b9731SStan Tomov                                   const CeedScalar *qref1d,
3493513a710Sjeremylt                                   const CeedScalar *qweight1d, CeedBasis basis) {
3507f5b9731SStan Tomov   int ierr;
3517f5b9731SStan Tomov   CeedBasis_Magma *impl;
3527f5b9731SStan Tomov   Ceed ceed;
3537f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
3547f5b9731SStan Tomov 
3557f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
3567f5b9731SStan Tomov                                 CeedBasisApply_Magma); CeedChk(ierr);
3577f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
3587f5b9731SStan Tomov                                 CeedBasisDestroy_Magma); CeedChk(ierr);
3597f5b9731SStan Tomov 
3607f5b9731SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
3617f5b9731SStan Tomov   ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr);
3627f5b9731SStan Tomov 
3637f5b9731SStan Tomov   // Copy qref1d to the GPU
3647f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0]));
3657f5b9731SStan Tomov   CeedChk(ierr);
3667f5b9731SStan Tomov   magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1);
3677f5b9731SStan Tomov 
3687f5b9731SStan Tomov   // Copy interp1d to the GPU
3697f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0]));
3707f5b9731SStan Tomov   CeedChk(ierr);
3717f5b9731SStan Tomov   magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1);
3727f5b9731SStan Tomov 
3737f5b9731SStan Tomov   // Copy grad1d to the GPU
3747f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0]));
3757f5b9731SStan Tomov   CeedChk(ierr);
3767f5b9731SStan Tomov   magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1);
3777f5b9731SStan Tomov 
3787f5b9731SStan Tomov   // Copy qweight1d to the GPU
3797f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0]));
3807f5b9731SStan Tomov   CeedChk(ierr);
3817f5b9731SStan Tomov   magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1);
3827f5b9731SStan Tomov 
3837f5b9731SStan Tomov   return 0;
3847f5b9731SStan Tomov }
3857f5b9731SStan Tomov 
3867f5b9731SStan Tomov #ifdef __cplusplus
3877f5b9731SStan Tomov CEED_INTERN "C"
3887f5b9731SStan Tomov #endif
3893513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof,
3903513a710Sjeremylt                             CeedInt nqpts, const CeedScalar *interp,
3913513a710Sjeremylt                             const CeedScalar *grad, const CeedScalar *qref,
3923513a710Sjeremylt                             const CeedScalar *qweight, CeedBasis basis) {
3937f5b9731SStan Tomov   int ierr;
394*868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
3957f5b9731SStan Tomov   Ceed ceed;
3967f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
3977f5b9731SStan Tomov 
398*868539c2SNatalie Beams   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
399*868539c2SNatalie Beams                                 CeedBasisApplyNonTensor_Magma); CeedChk(ierr);
400*868539c2SNatalie Beams   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
401*868539c2SNatalie Beams                                 CeedBasisDestroyNonTensor_Magma); CeedChk(ierr);
402*868539c2SNatalie Beams 
403*868539c2SNatalie Beams   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
404*868539c2SNatalie Beams   ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr);
405*868539c2SNatalie Beams 
406*868539c2SNatalie Beams   // Copy qref to the GPU
407*868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0]));
408*868539c2SNatalie Beams   CeedChk(ierr);
409*868539c2SNatalie Beams   magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1);
410*868539c2SNatalie Beams 
411*868539c2SNatalie Beams   // Copy interp to the GPU
412*868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0]));
413*868539c2SNatalie Beams   CeedChk(ierr);
414*868539c2SNatalie Beams   magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1);
415*868539c2SNatalie Beams 
416*868539c2SNatalie Beams   // Copy grad to the GPU
417*868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0]));
418*868539c2SNatalie Beams   CeedChk(ierr);
419*868539c2SNatalie Beams   magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1);
420*868539c2SNatalie Beams 
421*868539c2SNatalie Beams   // Copy qweight to the GPU
422*868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0]));
423*868539c2SNatalie Beams   CeedChk(ierr);
424*868539c2SNatalie Beams   magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1);
425*868539c2SNatalie Beams 
426*868539c2SNatalie Beams   return 0;
4277f5b9731SStan Tomov }
428