xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision 777ff853944a0dbc06f7f09486fdf4674828e728)
17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details.
47f5b9731SStan Tomov //
57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and
87f5b9731SStan Tomov // source code availability see http://github.com/ceed.
97f5b9731SStan Tomov //
107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early
157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
167f5b9731SStan Tomov 
177f5b9731SStan Tomov #include "ceed-magma.h"
187f5b9731SStan Tomov 
197f5b9731SStan Tomov #ifdef __cplusplus
207f5b9731SStan Tomov CEED_INTERN "C"
217f5b9731SStan Tomov #endif
227f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
237f5b9731SStan Tomov                          CeedTransposeMode tmode, CeedEvalMode emode,
243513a710Sjeremylt                          CeedVector U, CeedVector V) {
257f5b9731SStan Tomov   int ierr;
267f5b9731SStan Tomov   Ceed ceed;
277f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
28e0582403Sabdelfattah83   CeedInt dim, ncomp, ndof;
297f5b9731SStan Tomov   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
307f5b9731SStan Tomov   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
317f5b9731SStan Tomov   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
32e0582403Sabdelfattah83 
33e0582403Sabdelfattah83   Ceed_Magma *data;
34*777ff853SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChk(ierr);
35e0582403Sabdelfattah83 
367f5b9731SStan Tomov   const CeedScalar *u;
377f5b9731SStan Tomov   CeedScalar *v;
38868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
397f5b9731SStan Tomov     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChk(ierr);
407f5b9731SStan Tomov   } else if (emode != CEED_EVAL_WEIGHT) {
417f5b9731SStan Tomov     // LCOV_EXCL_START
427f5b9731SStan Tomov     return CeedError(ceed, 1,
437f5b9731SStan Tomov                      "An input vector is required for this CeedEvalMode");
447f5b9731SStan Tomov     // LCOV_EXCL_STOP
457f5b9731SStan Tomov   }
467f5b9731SStan Tomov   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChk(ierr);
477f5b9731SStan Tomov 
487f5b9731SStan Tomov   CeedBasis_Magma *impl;
49*777ff853SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChk(ierr);
507f5b9731SStan Tomov 
517f5b9731SStan Tomov   CeedInt P1d, Q1d;
527f5b9731SStan Tomov   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr);
537f5b9731SStan Tomov   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr);
547f5b9731SStan Tomov 
557f5b9731SStan Tomov   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d",
567f5b9731SStan Tomov             ncomp*CeedIntPow(P1d, dim), ncomp);
577f5b9731SStan Tomov 
587f5b9731SStan Tomov   if (tmode == CEED_TRANSPOSE) {
597f5b9731SStan Tomov     CeedInt length;
60465fc175SJeremy L Thompson     ierr = CeedVectorGetLength(V, &length); CeedChk(ierr);
61e0582403Sabdelfattah83     magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length, data->queue);
62e0582403Sabdelfattah83     ceed_magma_queue_sync( data->queue );
637f5b9731SStan Tomov   }
643513a710Sjeremylt   switch (emode) {
653513a710Sjeremylt   case CEED_EVAL_INTERP: {
667f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
677f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
687f5b9731SStan Tomov       P = Q1d; Q = P1d;
697f5b9731SStan Tomov     }
707f5b9731SStan Tomov 
717f5b9731SStan Tomov     // Define element sizes for dofs/quad
727f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
737f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
747f5b9731SStan Tomov 
757f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
76868539c2SNatalie Beams     //  component                        component
77868539c2SNatalie Beams     //    elem                             elem
787f5b9731SStan Tomov     //       node                            node
797f5b9731SStan Tomov 
807f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
817f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
827f5b9731SStan Tomov 
837f5b9731SStan Tomov     // Element strides
84868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
857f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
867f5b9731SStan Tomov     // Component strides
87868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
887f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
897f5b9731SStan Tomov 
907f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
917f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
927f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
937f5b9731SStan Tomov       // Element strides
94868539c2SNatalie Beams       v_elstride = eldofssize;
957f5b9731SStan Tomov       u_elstride = elquadsize;
967f5b9731SStan Tomov       // Component strides
97868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
987f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
997f5b9731SStan Tomov     }
1007f5b9731SStan Tomov 
101e0582403Sabdelfattah83     ierr = magma_interp(P, Q, dim, ncomp,
1027f5b9731SStan Tomov                         impl->dinterp1d, tmode,
103868539c2SNatalie Beams                         u, u_elstride, u_compstride,
104868539c2SNatalie Beams                         v, v_elstride, v_compstride,
105e0582403Sabdelfattah83                         nelem, data->basis_kernel_mode, data->maxthreads,
106e0582403Sabdelfattah83                         data->queue);
107e0582403Sabdelfattah83     if (ierr != 0) CeedError(ceed, 1,
108e0582403Sabdelfattah83                                "MAGMA: launch failure detected for magma_interp");
1097f5b9731SStan Tomov   }
1103513a710Sjeremylt   break;
1113513a710Sjeremylt   case CEED_EVAL_GRAD: {
1127f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
1137f5b9731SStan Tomov     // In CEED_NOTRANSPOSE mode:
1147f5b9731SStan Tomov     // u is (P^dim x nc), column-major layout (nc = ncomp)
1157f5b9731SStan Tomov     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
1167f5b9731SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
1177f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1187f5b9731SStan Tomov       P = Q1d, Q = P1d;
1197f5b9731SStan Tomov     }
1207f5b9731SStan Tomov 
1217f5b9731SStan Tomov     // Define element sizes for dofs/quad
1227f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
1237f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
1247f5b9731SStan Tomov 
1257f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
1267f5b9731SStan Tomov     //                                  dim
127868539c2SNatalie Beams     //  component                        component
128868539c2SNatalie Beams     //    elem                              elem
1297f5b9731SStan Tomov     //       node                            node
1307f5b9731SStan Tomov 
1317f5b9731SStan Tomov 
1327f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
1337f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
1347f5b9731SStan Tomov 
1357f5b9731SStan Tomov     // Element strides
136868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
1377f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
1387f5b9731SStan Tomov     // Component strides
139868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
1407f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
1417f5b9731SStan Tomov     // Dimension strides
1427f5b9731SStan Tomov     CeedInt u_dimstride = 0;
1437f5b9731SStan Tomov     CeedInt v_dimstride = nelem * elquadsize * ncomp;
1447f5b9731SStan Tomov 
1457f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
1467f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1477f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
1487f5b9731SStan Tomov       // Element strides
149868539c2SNatalie Beams       v_elstride = eldofssize;
1507f5b9731SStan Tomov       u_elstride = elquadsize;
1517f5b9731SStan Tomov       // Component strides
152868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
1537f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
1547f5b9731SStan Tomov       // Dimension strides
1557f5b9731SStan Tomov       v_dimstride = 0;
1567f5b9731SStan Tomov       u_dimstride = nelem * elquadsize * ncomp;
1577f5b9731SStan Tomov 
1587f5b9731SStan Tomov     }
1597f5b9731SStan Tomov 
160e0582403Sabdelfattah83     ierr = magma_grad( P, Q, dim, ncomp,
1617f5b9731SStan Tomov                        impl->dinterp1d, impl->dgrad1d, tmode,
162e0582403Sabdelfattah83                        u, u_elstride, u_compstride, u_dimstride,
163e0582403Sabdelfattah83                        v, v_elstride, v_compstride, v_dimstride,
164e0582403Sabdelfattah83                        nelem, data->basis_kernel_mode, data->maxthreads,
165e0582403Sabdelfattah83                        data->queue);
166e0582403Sabdelfattah83     if (ierr != 0) CeedError(ceed, 1,
167e0582403Sabdelfattah83                                "MAGMA: launch failure detected for magma_grad");
1687f5b9731SStan Tomov   }
1693513a710Sjeremylt   break;
1703513a710Sjeremylt   case CEED_EVAL_WEIGHT: {
1717f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE)
1727f5b9731SStan Tomov       // LCOV_EXCL_START
1737f5b9731SStan Tomov       return CeedError(ceed, 1,
1747f5b9731SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
1757f5b9731SStan Tomov     // LCOV_EXCL_STOP
1767f5b9731SStan Tomov     CeedInt Q = Q1d;
1777f5b9731SStan Tomov     int eldofssize = CeedIntPow(Q, dim);
178e0582403Sabdelfattah83     ierr = magma_weight(Q, dim, impl->dqweight1d, v, eldofssize, nelem,
179e0582403Sabdelfattah83                         data->basis_kernel_mode, data->maxthreads, data->queue);
180e0582403Sabdelfattah83     if (ierr != 0) CeedError(ceed, 1,
181e0582403Sabdelfattah83                                "MAGMA: launch failure detected for magma_weight");
1827f5b9731SStan Tomov   }
1833513a710Sjeremylt   break;
1843513a710Sjeremylt   // LCOV_EXCL_START
1853513a710Sjeremylt   case CEED_EVAL_DIV:
1863513a710Sjeremylt     return CeedError(ceed, 1, "CEED_EVAL_DIV not supported");
1873513a710Sjeremylt   case CEED_EVAL_CURL:
1883513a710Sjeremylt     return CeedError(ceed, 1, "CEED_EVAL_CURL not supported");
1893513a710Sjeremylt   case CEED_EVAL_NONE:
1903513a710Sjeremylt     return CeedError(ceed, 1,
1913513a710Sjeremylt                      "CEED_EVAL_NONE does not make sense in this context");
1923513a710Sjeremylt     // LCOV_EXCL_STOP
1933513a710Sjeremylt   }
1947f5b9731SStan Tomov 
195e0582403Sabdelfattah83   // must sync to ensure completeness
196e0582403Sabdelfattah83   ceed_magma_queue_sync( data->queue );
197e0582403Sabdelfattah83 
1987f5b9731SStan Tomov   if (emode!=CEED_EVAL_WEIGHT) {
1997f5b9731SStan Tomov     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr);
2007f5b9731SStan Tomov   }
2017f5b9731SStan Tomov   ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr);
2027f5b9731SStan Tomov   return 0;
2037f5b9731SStan Tomov }
2047f5b9731SStan Tomov 
2057f5b9731SStan Tomov #ifdef __cplusplus
2067f5b9731SStan Tomov CEED_INTERN "C"
2077f5b9731SStan Tomov #endif
208868539c2SNatalie Beams int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt nelem,
209868539c2SNatalie Beams                                   CeedTransposeMode tmode, CeedEvalMode emode,
210868539c2SNatalie Beams                                   CeedVector U, CeedVector V) {
211868539c2SNatalie Beams   int ierr;
212868539c2SNatalie Beams   Ceed ceed;
213868539c2SNatalie Beams   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
214e0582403Sabdelfattah83 
215e0582403Sabdelfattah83   Ceed_Magma *data;
216*777ff853SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChk(ierr);
217e0582403Sabdelfattah83 
218868539c2SNatalie Beams   CeedInt dim, ncomp, ndof, nqpt;
219868539c2SNatalie Beams   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
220868539c2SNatalie Beams   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
221868539c2SNatalie Beams   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
222868539c2SNatalie Beams   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
223868539c2SNatalie Beams   const CeedScalar *du;
224868539c2SNatalie Beams   CeedScalar *dv;
225868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
226868539c2SNatalie Beams     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChk(ierr);
227868539c2SNatalie Beams   } else if (emode != CEED_EVAL_WEIGHT) {
228868539c2SNatalie Beams     // LCOV_EXCL_START
229868539c2SNatalie Beams     return CeedError(ceed, 1,
230868539c2SNatalie Beams                      "An input vector is required for this CeedEvalMode");
231868539c2SNatalie Beams     // LCOV_EXCL_STOP
232868539c2SNatalie Beams   }
233868539c2SNatalie Beams   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChk(ierr);
234868539c2SNatalie Beams 
235868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
236*777ff853SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChk(ierr);
237868539c2SNatalie Beams 
238868539c2SNatalie Beams   CeedDebug("\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d",
239868539c2SNatalie Beams             ncomp*ndof, ncomp);
240868539c2SNatalie Beams 
241868539c2SNatalie Beams   if (tmode == CEED_TRANSPOSE) {
242868539c2SNatalie Beams     CeedInt length;
243868539c2SNatalie Beams     ierr = CeedVectorGetLength(V, &length);
244e0582403Sabdelfattah83     magmablas_dlaset(MagmaFull, length, 1, 0., 0., dv, length, data->queue);
245e0582403Sabdelfattah83     ceed_magma_queue_sync( data->queue );
246868539c2SNatalie Beams   }
247868539c2SNatalie Beams   switch (emode) {
248868539c2SNatalie Beams   case CEED_EVAL_INTERP: {
249868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
250868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
251e0582403Sabdelfattah83       magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans,
252868539c2SNatalie Beams                             P, nelem*ncomp, Q,
253868539c2SNatalie Beams                             1.0, impl->dinterp, P,
254868539c2SNatalie Beams                             du, Q,
255e0582403Sabdelfattah83                             0.0, dv, P, data->queue);
256868539c2SNatalie Beams     else
257e0582403Sabdelfattah83       magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans,
258868539c2SNatalie Beams                             Q, nelem*ncomp, P,
259868539c2SNatalie Beams                             1.0, impl->dinterp, P,
260868539c2SNatalie Beams                             du, P,
261e0582403Sabdelfattah83                             0.0, dv, Q, data->queue);
262868539c2SNatalie Beams   }
263868539c2SNatalie Beams   break;
264868539c2SNatalie Beams 
265868539c2SNatalie Beams   case CEED_EVAL_GRAD: {
266868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
267868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE) {
268868539c2SNatalie Beams       double beta = 0.0;
269868539c2SNatalie Beams       for(int d=0; d<dim; d++) {
270868539c2SNatalie Beams         if (d>0)
271868539c2SNatalie Beams           beta = 1.0;
272e0582403Sabdelfattah83         magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans,
273868539c2SNatalie Beams                               P, nelem*ncomp, Q,
274868539c2SNatalie Beams                               1.0, impl->dgrad + d*P*Q, P,
275868539c2SNatalie Beams                               du + d*nelem*ncomp*Q, Q,
276e0582403Sabdelfattah83                               beta, dv, P, data->queue);
277868539c2SNatalie Beams       }
278868539c2SNatalie Beams     } else {
279868539c2SNatalie Beams       for(int d=0; d< dim; d++)
280e0582403Sabdelfattah83         magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans,
281868539c2SNatalie Beams                               Q, nelem*ncomp, P,
282868539c2SNatalie Beams                               1.0, impl->dgrad + d*P*Q, P,
283868539c2SNatalie Beams                               du, P,
284e0582403Sabdelfattah83                               0.0, dv + d*nelem*ncomp*Q, Q, data->queue);
285868539c2SNatalie Beams     }
286868539c2SNatalie Beams   }
287868539c2SNatalie Beams   break;
288868539c2SNatalie Beams 
289868539c2SNatalie Beams   case CEED_EVAL_WEIGHT: {
290868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
291868539c2SNatalie Beams       // LCOV_EXCL_START
292868539c2SNatalie Beams       return CeedError(ceed, 1,
293868539c2SNatalie Beams                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
294868539c2SNatalie Beams     // LCOV_EXCL_STOP
295868539c2SNatalie Beams 
296868539c2SNatalie Beams     int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1;
297868539c2SNatalie Beams     int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)?
298868539c2SNatalie Beams                                        1 : 0 );
299e0582403Sabdelfattah83     magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv,
300e0582403Sabdelfattah83                            data->queue);
301868539c2SNatalie Beams     CeedChk(ierr);
302868539c2SNatalie Beams   }
303868539c2SNatalie Beams   break;
304868539c2SNatalie Beams 
305868539c2SNatalie Beams   // LCOV_EXCL_START
306868539c2SNatalie Beams   case CEED_EVAL_DIV:
307868539c2SNatalie Beams     return CeedError(ceed, 1, "CEED_EVAL_DIV not supported");
308868539c2SNatalie Beams   case CEED_EVAL_CURL:
309868539c2SNatalie Beams     return CeedError(ceed, 1, "CEED_EVAL_CURL not supported");
310868539c2SNatalie Beams   case CEED_EVAL_NONE:
311868539c2SNatalie Beams     return CeedError(ceed, 1,
312868539c2SNatalie Beams                      "CEED_EVAL_NONE does not make sense in this context");
313868539c2SNatalie Beams     // LCOV_EXCL_STOP
314868539c2SNatalie Beams   }
315868539c2SNatalie Beams 
316e0582403Sabdelfattah83   // must sync to ensure completeness
317e0582403Sabdelfattah83   ceed_magma_queue_sync( data->queue );
318e0582403Sabdelfattah83 
319868539c2SNatalie Beams   if (emode!=CEED_EVAL_WEIGHT) {
320868539c2SNatalie Beams     ierr = CeedVectorRestoreArrayRead(U, &du); CeedChk(ierr);
321868539c2SNatalie Beams   }
322868539c2SNatalie Beams   ierr = CeedVectorRestoreArray(V, &dv); CeedChk(ierr);
323868539c2SNatalie Beams   return 0;
324868539c2SNatalie Beams }
325868539c2SNatalie Beams 
326868539c2SNatalie Beams #ifdef __cplusplus
327868539c2SNatalie Beams CEED_INTERN "C"
328868539c2SNatalie Beams #endif
3293513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) {
3307f5b9731SStan Tomov   int ierr;
3317f5b9731SStan Tomov   CeedBasis_Magma *impl;
332*777ff853SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChk(ierr);
3337f5b9731SStan Tomov 
3347f5b9731SStan Tomov   ierr = magma_free(impl->dqref1d); CeedChk(ierr);
3357f5b9731SStan Tomov   ierr = magma_free(impl->dinterp1d); CeedChk(ierr);
3367f5b9731SStan Tomov   ierr = magma_free(impl->dgrad1d); CeedChk(ierr);
3377f5b9731SStan Tomov   ierr = magma_free(impl->dqweight1d); CeedChk(ierr);
3387f5b9731SStan Tomov 
3397f5b9731SStan Tomov   ierr = CeedFree(&impl); CeedChk(ierr);
3407f5b9731SStan Tomov 
3417f5b9731SStan Tomov   return 0;
3427f5b9731SStan Tomov }
3437f5b9731SStan Tomov 
3447f5b9731SStan Tomov #ifdef __cplusplus
3457f5b9731SStan Tomov CEED_INTERN "C"
3467f5b9731SStan Tomov #endif
347868539c2SNatalie Beams int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) {
348868539c2SNatalie Beams   int ierr;
349868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
350*777ff853SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChk(ierr);
351868539c2SNatalie Beams 
352868539c2SNatalie Beams   ierr = magma_free(impl->dqref); CeedChk(ierr);
353868539c2SNatalie Beams   ierr = magma_free(impl->dinterp); CeedChk(ierr);
354868539c2SNatalie Beams   ierr = magma_free(impl->dgrad); CeedChk(ierr);
355868539c2SNatalie Beams   ierr = magma_free(impl->dqweight); CeedChk(ierr);
356868539c2SNatalie Beams 
357868539c2SNatalie Beams   ierr = CeedFree(&impl); CeedChk(ierr);
358868539c2SNatalie Beams 
359868539c2SNatalie Beams   return 0;
360868539c2SNatalie Beams }
361868539c2SNatalie Beams 
362868539c2SNatalie Beams #ifdef __cplusplus
363868539c2SNatalie Beams CEED_INTERN "C"
364868539c2SNatalie Beams #endif
3653513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d,
3663513a710Sjeremylt                                   const CeedScalar *interp1d,
3677f5b9731SStan Tomov                                   const CeedScalar *grad1d,
3687f5b9731SStan Tomov                                   const CeedScalar *qref1d,
3693513a710Sjeremylt                                   const CeedScalar *qweight1d, CeedBasis basis) {
3707f5b9731SStan Tomov   int ierr;
3717f5b9731SStan Tomov   CeedBasis_Magma *impl;
3727f5b9731SStan Tomov   Ceed ceed;
3737f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
3747f5b9731SStan Tomov 
375e0582403Sabdelfattah83   Ceed_Magma *data;
376*777ff853SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChk(ierr);
377e0582403Sabdelfattah83 
3787f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
3797f5b9731SStan Tomov                                 CeedBasisApply_Magma); CeedChk(ierr);
3807f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
3817f5b9731SStan Tomov                                 CeedBasisDestroy_Magma); CeedChk(ierr);
3827f5b9731SStan Tomov 
3837f5b9731SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
384*777ff853SJeremy L Thompson   ierr = CeedBasisSetData(basis, impl); CeedChk(ierr);
3857f5b9731SStan Tomov 
3867f5b9731SStan Tomov   // Copy qref1d to the GPU
3877f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0]));
3887f5b9731SStan Tomov   CeedChk(ierr);
389e0582403Sabdelfattah83   magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1,
390e0582403Sabdelfattah83                   data->queue);
3917f5b9731SStan Tomov 
3927f5b9731SStan Tomov   // Copy interp1d to the GPU
3937f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0]));
3947f5b9731SStan Tomov   CeedChk(ierr);
395e0582403Sabdelfattah83   magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1,
396e0582403Sabdelfattah83                   data->queue);
3977f5b9731SStan Tomov 
3987f5b9731SStan Tomov   // Copy grad1d to the GPU
3997f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0]));
4007f5b9731SStan Tomov   CeedChk(ierr);
401e0582403Sabdelfattah83   magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1,
402e0582403Sabdelfattah83                   data->queue);
4037f5b9731SStan Tomov 
4047f5b9731SStan Tomov   // Copy qweight1d to the GPU
4057f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0]));
4067f5b9731SStan Tomov   CeedChk(ierr);
407e0582403Sabdelfattah83   magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1,
408e0582403Sabdelfattah83                   data->queue);
4097f5b9731SStan Tomov 
4107f5b9731SStan Tomov   return 0;
4117f5b9731SStan Tomov }
4127f5b9731SStan Tomov 
4137f5b9731SStan Tomov #ifdef __cplusplus
4147f5b9731SStan Tomov CEED_INTERN "C"
4157f5b9731SStan Tomov #endif
4163513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof,
4173513a710Sjeremylt                             CeedInt nqpts, const CeedScalar *interp,
4183513a710Sjeremylt                             const CeedScalar *grad, const CeedScalar *qref,
4193513a710Sjeremylt                             const CeedScalar *qweight, CeedBasis basis) {
4207f5b9731SStan Tomov   int ierr;
421868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
4227f5b9731SStan Tomov   Ceed ceed;
4237f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
4247f5b9731SStan Tomov 
425e0582403Sabdelfattah83   Ceed_Magma *data;
426*777ff853SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChk(ierr);
427e0582403Sabdelfattah83 
428868539c2SNatalie Beams   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
429868539c2SNatalie Beams                                 CeedBasisApplyNonTensor_Magma); CeedChk(ierr);
430868539c2SNatalie Beams   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
431868539c2SNatalie Beams                                 CeedBasisDestroyNonTensor_Magma); CeedChk(ierr);
432868539c2SNatalie Beams 
433868539c2SNatalie Beams   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
434*777ff853SJeremy L Thompson   ierr = CeedBasisSetData(basis, impl); CeedChk(ierr);
435868539c2SNatalie Beams 
436868539c2SNatalie Beams   // Copy qref to the GPU
437868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0]));
438868539c2SNatalie Beams   CeedChk(ierr);
439e0582403Sabdelfattah83   magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1, data->queue);
440868539c2SNatalie Beams 
441868539c2SNatalie Beams   // Copy interp to the GPU
442868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0]));
443868539c2SNatalie Beams   CeedChk(ierr);
444e0582403Sabdelfattah83   magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1,
445e0582403Sabdelfattah83                   data->queue);
446868539c2SNatalie Beams 
447868539c2SNatalie Beams   // Copy grad to the GPU
448868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0]));
449868539c2SNatalie Beams   CeedChk(ierr);
450e0582403Sabdelfattah83   magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1,
451e0582403Sabdelfattah83                   data->queue);
452868539c2SNatalie Beams 
453868539c2SNatalie Beams   // Copy qweight to the GPU
454868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0]));
455868539c2SNatalie Beams   CeedChk(ierr);
456e0582403Sabdelfattah83   magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1,
457e0582403Sabdelfattah83                   data->queue);
458868539c2SNatalie Beams 
459868539c2SNatalie Beams   return 0;
4607f5b9731SStan Tomov }
461