xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision 465fc175a5f0c1a9f970b04216cfa606614f42fc)
17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details.
47f5b9731SStan Tomov //
57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and
87f5b9731SStan Tomov // source code availability see http://github.com/ceed.
97f5b9731SStan Tomov //
107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early
157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
167f5b9731SStan Tomov 
177f5b9731SStan Tomov #include "ceed-magma.h"
187f5b9731SStan Tomov 
197f5b9731SStan Tomov #ifdef __cplusplus
207f5b9731SStan Tomov CEED_INTERN "C"
217f5b9731SStan Tomov #endif
227f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
237f5b9731SStan Tomov                          CeedTransposeMode tmode, CeedEvalMode emode,
243513a710Sjeremylt                          CeedVector U, CeedVector V) {
257f5b9731SStan Tomov   int ierr;
267f5b9731SStan Tomov   Ceed ceed;
277f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
287f5b9731SStan Tomov   CeedInt dim, ncomp, ndof, nqpt;
297f5b9731SStan Tomov   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
307f5b9731SStan Tomov   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
317f5b9731SStan Tomov   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
327f5b9731SStan Tomov   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
337f5b9731SStan Tomov   const CeedScalar *u;
347f5b9731SStan Tomov   CeedScalar *v;
35868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
367f5b9731SStan Tomov     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChk(ierr);
377f5b9731SStan Tomov   } else if (emode != CEED_EVAL_WEIGHT) {
387f5b9731SStan Tomov     // LCOV_EXCL_START
397f5b9731SStan Tomov     return CeedError(ceed, 1,
407f5b9731SStan Tomov                      "An input vector is required for this CeedEvalMode");
417f5b9731SStan Tomov     // LCOV_EXCL_STOP
427f5b9731SStan Tomov   }
437f5b9731SStan Tomov   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChk(ierr);
447f5b9731SStan Tomov 
457f5b9731SStan Tomov   CeedBasis_Magma *impl;
467f5b9731SStan Tomov   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
477f5b9731SStan Tomov 
487f5b9731SStan Tomov   CeedInt P1d, Q1d;
497f5b9731SStan Tomov   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr);
507f5b9731SStan Tomov   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr);
517f5b9731SStan Tomov 
527f5b9731SStan Tomov   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d",
537f5b9731SStan Tomov             ncomp*CeedIntPow(P1d, dim), ncomp);
547f5b9731SStan Tomov 
557f5b9731SStan Tomov   if (tmode == CEED_TRANSPOSE) {
567f5b9731SStan Tomov     CeedInt length;
57*465fc175SJeremy L Thompson     ierr = CeedVectorGetLength(V, &length); CeedChk(ierr);
587f5b9731SStan Tomov     magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length);
597f5b9731SStan Tomov   }
603513a710Sjeremylt   switch (emode) {
613513a710Sjeremylt   case CEED_EVAL_INTERP: {
627f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
637f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
647f5b9731SStan Tomov       P = Q1d; Q = P1d;
657f5b9731SStan Tomov     }
667f5b9731SStan Tomov 
677f5b9731SStan Tomov     // Define element sizes for dofs/quad
687f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
697f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
707f5b9731SStan Tomov 
717f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
72868539c2SNatalie Beams     //  component                        component
73868539c2SNatalie Beams     //    elem                             elem
747f5b9731SStan Tomov     //       node                            node
757f5b9731SStan Tomov 
767f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
777f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
787f5b9731SStan Tomov 
797f5b9731SStan Tomov     // Element strides
80868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
817f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
827f5b9731SStan Tomov     // Component strides
83868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
847f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
857f5b9731SStan Tomov 
867f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
877f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
887f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
897f5b9731SStan Tomov       // Element strides
90868539c2SNatalie Beams       v_elstride = eldofssize;
917f5b9731SStan Tomov       u_elstride = elquadsize;
927f5b9731SStan Tomov       // Component strides
93868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
947f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
957f5b9731SStan Tomov     }
967f5b9731SStan Tomov 
977f5b9731SStan Tomov     // Loop through components and apply batch over elements
987f5b9731SStan Tomov     magmablas_dbasis_apply_batched_eval_interp(P, Q, dim, ncomp,
997f5b9731SStan Tomov         impl->dinterp1d, tmode,
100868539c2SNatalie Beams         u, u_elstride, u_compstride,
101868539c2SNatalie Beams         v, v_elstride, v_compstride,
1027f5b9731SStan Tomov         nelem);
1037f5b9731SStan Tomov   }
1043513a710Sjeremylt   break;
1053513a710Sjeremylt   case CEED_EVAL_GRAD: {
1067f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
1077f5b9731SStan Tomov     // In CEED_NOTRANSPOSE mode:
1087f5b9731SStan Tomov     // u is (P^dim x nc), column-major layout (nc = ncomp)
1097f5b9731SStan Tomov     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
1107f5b9731SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
1117f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1127f5b9731SStan Tomov       P = Q1d, Q = P1d;
1137f5b9731SStan Tomov     }
1147f5b9731SStan Tomov 
1157f5b9731SStan Tomov     // Define element sizes for dofs/quad
1167f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
1177f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
1187f5b9731SStan Tomov 
1197f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
1207f5b9731SStan Tomov     //                                  dim
121868539c2SNatalie Beams     //  component                        component
122868539c2SNatalie Beams     //    elem                              elem
1237f5b9731SStan Tomov     //       node                            node
1247f5b9731SStan Tomov 
1257f5b9731SStan Tomov 
1267f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
1277f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
1287f5b9731SStan Tomov 
1297f5b9731SStan Tomov     // Element strides
130868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
1317f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
1327f5b9731SStan Tomov     // Component strides
133868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
1347f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
1357f5b9731SStan Tomov     // Dimension strides
1367f5b9731SStan Tomov     CeedInt u_dimstride = 0;
1377f5b9731SStan Tomov     CeedInt v_dimstride = nelem * elquadsize * ncomp;
1387f5b9731SStan Tomov 
1397f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
1407f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1417f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
1427f5b9731SStan Tomov       // Element strides
143868539c2SNatalie Beams       v_elstride = eldofssize;
1447f5b9731SStan Tomov       u_elstride = elquadsize;
1457f5b9731SStan Tomov       // Component strides
146868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
1477f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
1487f5b9731SStan Tomov       // Dimension strides
1497f5b9731SStan Tomov       v_dimstride = 0;
1507f5b9731SStan Tomov       u_dimstride = nelem * elquadsize * ncomp;
1517f5b9731SStan Tomov 
1527f5b9731SStan Tomov     }
1537f5b9731SStan Tomov 
154868539c2SNatalie Beams     // Loop through grad dimensions only, batch call over elements and components
1553513a710Sjeremylt     for (CeedInt dim_ctr = 0; dim_ctr < dim; dim_ctr++)
1567f5b9731SStan Tomov       magmablas_dbasis_apply_batched_eval_grad(P, Q, dim, ncomp, nqpt,
1577f5b9731SStan Tomov           impl->dinterp1d, impl->dgrad1d, tmode,
158868539c2SNatalie Beams           u + dim_ctr * u_dimstride, u_elstride, u_compstride, u_dimstride,
159868539c2SNatalie Beams           v + dim_ctr * v_dimstride, v_elstride, v_compstride, v_dimstride,
160868539c2SNatalie Beams           dim_ctr, nelem);
1617f5b9731SStan Tomov   }
1623513a710Sjeremylt   break;
1633513a710Sjeremylt   case CEED_EVAL_WEIGHT: {
1647f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE)
1657f5b9731SStan Tomov       // LCOV_EXCL_START
1667f5b9731SStan Tomov       return CeedError(ceed, 1,
1677f5b9731SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
1687f5b9731SStan Tomov     // LCOV_EXCL_STOP
1697f5b9731SStan Tomov     CeedInt Q = Q1d;
1707f5b9731SStan Tomov     int eldofssize = CeedIntPow(Q, dim);
1717f5b9731SStan Tomov     magmablas_dbasis_apply_batched_eval_weight(Q, dim, impl->dqweight1d,
1727f5b9731SStan Tomov         v, eldofssize,
1737f5b9731SStan Tomov         nelem);
1747f5b9731SStan Tomov   }
1753513a710Sjeremylt   break;
1763513a710Sjeremylt   // LCOV_EXCL_START
1773513a710Sjeremylt   case CEED_EVAL_DIV:
1783513a710Sjeremylt     return CeedError(ceed, 1, "CEED_EVAL_DIV not supported");
1793513a710Sjeremylt   case CEED_EVAL_CURL:
1803513a710Sjeremylt     return CeedError(ceed, 1, "CEED_EVAL_CURL not supported");
1813513a710Sjeremylt   case CEED_EVAL_NONE:
1823513a710Sjeremylt     return CeedError(ceed, 1,
1833513a710Sjeremylt                      "CEED_EVAL_NONE does not make sense in this context");
1843513a710Sjeremylt     // LCOV_EXCL_STOP
1853513a710Sjeremylt   }
1867f5b9731SStan Tomov 
1877f5b9731SStan Tomov   if (emode!=CEED_EVAL_WEIGHT) {
1887f5b9731SStan Tomov     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr);
1897f5b9731SStan Tomov   }
1907f5b9731SStan Tomov   ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr);
1917f5b9731SStan Tomov   return 0;
1927f5b9731SStan Tomov }
1937f5b9731SStan Tomov 
1947f5b9731SStan Tomov #ifdef __cplusplus
1957f5b9731SStan Tomov CEED_INTERN "C"
1967f5b9731SStan Tomov #endif
197868539c2SNatalie Beams int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt nelem,
198868539c2SNatalie Beams                                   CeedTransposeMode tmode, CeedEvalMode emode,
199868539c2SNatalie Beams                                   CeedVector U, CeedVector V) {
200868539c2SNatalie Beams   int ierr;
201868539c2SNatalie Beams   Ceed ceed;
202868539c2SNatalie Beams   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
203868539c2SNatalie Beams   CeedInt dim, ncomp, ndof, nqpt;
204868539c2SNatalie Beams   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
205868539c2SNatalie Beams   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
206868539c2SNatalie Beams   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
207868539c2SNatalie Beams   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
208868539c2SNatalie Beams   const CeedScalar *du;
209868539c2SNatalie Beams   CeedScalar *dv;
210868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
211868539c2SNatalie Beams     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChk(ierr);
212868539c2SNatalie Beams   } else if (emode != CEED_EVAL_WEIGHT) {
213868539c2SNatalie Beams     // LCOV_EXCL_START
214868539c2SNatalie Beams     return CeedError(ceed, 1,
215868539c2SNatalie Beams                      "An input vector is required for this CeedEvalMode");
216868539c2SNatalie Beams     // LCOV_EXCL_STOP
217868539c2SNatalie Beams   }
218868539c2SNatalie Beams   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChk(ierr);
219868539c2SNatalie Beams 
220868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
221868539c2SNatalie Beams   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
222868539c2SNatalie Beams 
223868539c2SNatalie Beams   CeedDebug("\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d",
224868539c2SNatalie Beams             ncomp*ndof, ncomp);
225868539c2SNatalie Beams 
226868539c2SNatalie Beams   if (tmode == CEED_TRANSPOSE) {
227868539c2SNatalie Beams     CeedInt length;
228868539c2SNatalie Beams     ierr = CeedVectorGetLength(V, &length);
229868539c2SNatalie Beams     magmablas_dlaset(MagmaFull, length, 1, 0., 0., dv, length);
230868539c2SNatalie Beams   }
231868539c2SNatalie Beams   switch (emode) {
232868539c2SNatalie Beams   case CEED_EVAL_INTERP: {
233868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
234868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
235868539c2SNatalie Beams       magma_dgemm(MagmaNoTrans, MagmaNoTrans,
236868539c2SNatalie Beams                   P, nelem*ncomp, Q,
237868539c2SNatalie Beams                   1.0, impl->dinterp, P,
238868539c2SNatalie Beams                   du, Q,
239868539c2SNatalie Beams                   0.0, dv, P);
240868539c2SNatalie Beams     else
241868539c2SNatalie Beams       magma_dgemm(MagmaTrans, MagmaNoTrans,
242868539c2SNatalie Beams                   Q, nelem*ncomp, P,
243868539c2SNatalie Beams                   1.0, impl->dinterp, P,
244868539c2SNatalie Beams                   du, P,
245868539c2SNatalie Beams                   0.0, dv, Q);
246868539c2SNatalie Beams   }
247868539c2SNatalie Beams   break;
248868539c2SNatalie Beams 
249868539c2SNatalie Beams   case CEED_EVAL_GRAD: {
250868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
251868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE) {
252868539c2SNatalie Beams       double beta = 0.0;
253868539c2SNatalie Beams       for(int d=0; d<dim; d++) {
254868539c2SNatalie Beams         if (d>0)
255868539c2SNatalie Beams           beta = 1.0;
256868539c2SNatalie Beams         magma_dgemm(MagmaNoTrans, MagmaNoTrans,
257868539c2SNatalie Beams                     P, nelem*ncomp, Q,
258868539c2SNatalie Beams                     1.0, impl->dgrad + d*P*Q, P,
259868539c2SNatalie Beams                     du + d*nelem*ncomp*Q, Q,
260868539c2SNatalie Beams                     beta, dv, P);
261868539c2SNatalie Beams       }
262868539c2SNatalie Beams     } else {
263868539c2SNatalie Beams       for(int d=0; d< dim; d++)
264868539c2SNatalie Beams         magma_dgemm(MagmaTrans, MagmaNoTrans,
265868539c2SNatalie Beams                     Q, nelem*ncomp, P,
266868539c2SNatalie Beams                     1.0, impl->dgrad + d*P*Q, P,
267868539c2SNatalie Beams                     du, P,
268868539c2SNatalie Beams                     0.0, dv + d*nelem*ncomp*Q, Q);
269868539c2SNatalie Beams     }
270868539c2SNatalie Beams   }
271868539c2SNatalie Beams   break;
272868539c2SNatalie Beams 
273868539c2SNatalie Beams   case CEED_EVAL_WEIGHT: {
274868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
275868539c2SNatalie Beams       // LCOV_EXCL_START
276868539c2SNatalie Beams       return CeedError(ceed, 1,
277868539c2SNatalie Beams                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
278868539c2SNatalie Beams     // LCOV_EXCL_STOP
279868539c2SNatalie Beams 
280868539c2SNatalie Beams     int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1;
281868539c2SNatalie Beams     int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)?
282868539c2SNatalie Beams                                        1 : 0 );
283868539c2SNatalie Beams     magma_weight(grid, nqpt, nelem, nqpt, impl->dqweight, dv);
284868539c2SNatalie Beams     CeedChk(ierr);
285868539c2SNatalie Beams   }
286868539c2SNatalie Beams   break;
287868539c2SNatalie Beams 
288868539c2SNatalie Beams   // LCOV_EXCL_START
289868539c2SNatalie Beams   case CEED_EVAL_DIV:
290868539c2SNatalie Beams     return CeedError(ceed, 1, "CEED_EVAL_DIV not supported");
291868539c2SNatalie Beams   case CEED_EVAL_CURL:
292868539c2SNatalie Beams     return CeedError(ceed, 1, "CEED_EVAL_CURL not supported");
293868539c2SNatalie Beams   case CEED_EVAL_NONE:
294868539c2SNatalie Beams     return CeedError(ceed, 1,
295868539c2SNatalie Beams                      "CEED_EVAL_NONE does not make sense in this context");
296868539c2SNatalie Beams     // LCOV_EXCL_STOP
297868539c2SNatalie Beams   }
298868539c2SNatalie Beams 
299868539c2SNatalie Beams   if(emode!=CEED_EVAL_WEIGHT) {
300868539c2SNatalie Beams     ierr = CeedVectorRestoreArrayRead(U, &du); CeedChk(ierr);
301868539c2SNatalie Beams   }
302868539c2SNatalie Beams   ierr = CeedVectorRestoreArray(V, &dv); CeedChk(ierr);
303868539c2SNatalie Beams   return 0;
304868539c2SNatalie Beams }
305868539c2SNatalie Beams 
306868539c2SNatalie Beams #ifdef __cplusplus
307868539c2SNatalie Beams CEED_INTERN "C"
308868539c2SNatalie Beams #endif
3093513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) {
3107f5b9731SStan Tomov   int ierr;
3117f5b9731SStan Tomov   CeedBasis_Magma *impl;
3127f5b9731SStan Tomov   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
3137f5b9731SStan Tomov 
3147f5b9731SStan Tomov   ierr = magma_free(impl->dqref1d); CeedChk(ierr);
3157f5b9731SStan Tomov   ierr = magma_free(impl->dinterp1d); CeedChk(ierr);
3167f5b9731SStan Tomov   ierr = magma_free(impl->dgrad1d); CeedChk(ierr);
3177f5b9731SStan Tomov   ierr = magma_free(impl->dqweight1d); CeedChk(ierr);
3187f5b9731SStan Tomov 
3197f5b9731SStan Tomov   ierr = CeedFree(&impl); CeedChk(ierr);
3207f5b9731SStan Tomov 
3217f5b9731SStan Tomov   return 0;
3227f5b9731SStan Tomov }
3237f5b9731SStan Tomov 
3247f5b9731SStan Tomov #ifdef __cplusplus
3257f5b9731SStan Tomov CEED_INTERN "C"
3267f5b9731SStan Tomov #endif
327868539c2SNatalie Beams int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) {
328868539c2SNatalie Beams   int ierr;
329868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
330868539c2SNatalie Beams   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
331868539c2SNatalie Beams 
332868539c2SNatalie Beams   ierr = magma_free(impl->dqref); CeedChk(ierr);
333868539c2SNatalie Beams   ierr = magma_free(impl->dinterp); CeedChk(ierr);
334868539c2SNatalie Beams   ierr = magma_free(impl->dgrad); CeedChk(ierr);
335868539c2SNatalie Beams   ierr = magma_free(impl->dqweight); CeedChk(ierr);
336868539c2SNatalie Beams 
337868539c2SNatalie Beams   ierr = CeedFree(&impl); CeedChk(ierr);
338868539c2SNatalie Beams 
339868539c2SNatalie Beams   return 0;
340868539c2SNatalie Beams }
341868539c2SNatalie Beams 
342868539c2SNatalie Beams #ifdef __cplusplus
343868539c2SNatalie Beams CEED_INTERN "C"
344868539c2SNatalie Beams #endif
3453513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d,
3463513a710Sjeremylt                                   const CeedScalar *interp1d,
3477f5b9731SStan Tomov                                   const CeedScalar *grad1d,
3487f5b9731SStan Tomov                                   const CeedScalar *qref1d,
3493513a710Sjeremylt                                   const CeedScalar *qweight1d, CeedBasis basis) {
3507f5b9731SStan Tomov   int ierr;
3517f5b9731SStan Tomov   CeedBasis_Magma *impl;
3527f5b9731SStan Tomov   Ceed ceed;
3537f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
3547f5b9731SStan Tomov 
3557f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
3567f5b9731SStan Tomov                                 CeedBasisApply_Magma); CeedChk(ierr);
3577f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
3587f5b9731SStan Tomov                                 CeedBasisDestroy_Magma); CeedChk(ierr);
3597f5b9731SStan Tomov 
3607f5b9731SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
3617f5b9731SStan Tomov   ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr);
3627f5b9731SStan Tomov 
3637f5b9731SStan Tomov   // Copy qref1d to the GPU
3647f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0]));
3657f5b9731SStan Tomov   CeedChk(ierr);
3667f5b9731SStan Tomov   magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1);
3677f5b9731SStan Tomov 
3687f5b9731SStan Tomov   // Copy interp1d to the GPU
3697f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0]));
3707f5b9731SStan Tomov   CeedChk(ierr);
3717f5b9731SStan Tomov   magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1);
3727f5b9731SStan Tomov 
3737f5b9731SStan Tomov   // Copy grad1d to the GPU
3747f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0]));
3757f5b9731SStan Tomov   CeedChk(ierr);
3767f5b9731SStan Tomov   magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1);
3777f5b9731SStan Tomov 
3787f5b9731SStan Tomov   // Copy qweight1d to the GPU
3797f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0]));
3807f5b9731SStan Tomov   CeedChk(ierr);
3817f5b9731SStan Tomov   magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1);
3827f5b9731SStan Tomov 
3837f5b9731SStan Tomov   return 0;
3847f5b9731SStan Tomov }
3857f5b9731SStan Tomov 
3867f5b9731SStan Tomov #ifdef __cplusplus
3877f5b9731SStan Tomov CEED_INTERN "C"
3887f5b9731SStan Tomov #endif
3893513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof,
3903513a710Sjeremylt                             CeedInt nqpts, const CeedScalar *interp,
3913513a710Sjeremylt                             const CeedScalar *grad, const CeedScalar *qref,
3923513a710Sjeremylt                             const CeedScalar *qweight, CeedBasis basis) {
3937f5b9731SStan Tomov   int ierr;
394868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
3957f5b9731SStan Tomov   Ceed ceed;
3967f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
3977f5b9731SStan Tomov 
398868539c2SNatalie Beams   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
399868539c2SNatalie Beams                                 CeedBasisApplyNonTensor_Magma); CeedChk(ierr);
400868539c2SNatalie Beams   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
401868539c2SNatalie Beams                                 CeedBasisDestroyNonTensor_Magma); CeedChk(ierr);
402868539c2SNatalie Beams 
403868539c2SNatalie Beams   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
404868539c2SNatalie Beams   ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr);
405868539c2SNatalie Beams 
406868539c2SNatalie Beams   // Copy qref to the GPU
407868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0]));
408868539c2SNatalie Beams   CeedChk(ierr);
409868539c2SNatalie Beams   magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1);
410868539c2SNatalie Beams 
411868539c2SNatalie Beams   // Copy interp to the GPU
412868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0]));
413868539c2SNatalie Beams   CeedChk(ierr);
414868539c2SNatalie Beams   magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1);
415868539c2SNatalie Beams 
416868539c2SNatalie Beams   // Copy grad to the GPU
417868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0]));
418868539c2SNatalie Beams   CeedChk(ierr);
419868539c2SNatalie Beams   magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1);
420868539c2SNatalie Beams 
421868539c2SNatalie Beams   // Copy qweight to the GPU
422868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0]));
423868539c2SNatalie Beams   CeedChk(ierr);
424868539c2SNatalie Beams   magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1);
425868539c2SNatalie Beams 
426868539c2SNatalie Beams   return 0;
4277f5b9731SStan Tomov }
428