xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision e15f9bd09af0280c89b79924fa9af7dd2e3e30be)
17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details.
47f5b9731SStan Tomov //
57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and
87f5b9731SStan Tomov // source code availability see http://github.com/ceed.
97f5b9731SStan Tomov //
107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early
157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
167f5b9731SStan Tomov 
173d576824SJeremy L Thompson #include <ceed.h>
183d576824SJeremy L Thompson #include <ceed-backend.h>
197f5b9731SStan Tomov #include "ceed-magma.h"
207f5b9731SStan Tomov 
217f5b9731SStan Tomov #ifdef __cplusplus
227f5b9731SStan Tomov CEED_INTERN "C"
237f5b9731SStan Tomov #endif
247f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
257f5b9731SStan Tomov                          CeedTransposeMode tmode, CeedEvalMode emode,
263513a710Sjeremylt                          CeedVector U, CeedVector V) {
277f5b9731SStan Tomov   int ierr;
287f5b9731SStan Tomov   Ceed ceed;
29*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
30e0582403Sabdelfattah83   CeedInt dim, ncomp, ndof;
31*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
32*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
33*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr);
34e0582403Sabdelfattah83 
35e0582403Sabdelfattah83   Ceed_Magma *data;
36*e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
37e0582403Sabdelfattah83 
387f5b9731SStan Tomov   const CeedScalar *u;
397f5b9731SStan Tomov   CeedScalar *v;
40868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
41*e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChkBackend(ierr);
427f5b9731SStan Tomov   } else if (emode != CEED_EVAL_WEIGHT) {
437f5b9731SStan Tomov     // LCOV_EXCL_START
44*e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
457f5b9731SStan Tomov                      "An input vector is required for this CeedEvalMode");
467f5b9731SStan Tomov     // LCOV_EXCL_STOP
477f5b9731SStan Tomov   }
48*e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChkBackend(ierr);
497f5b9731SStan Tomov 
507f5b9731SStan Tomov   CeedBasis_Magma *impl;
51*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
527f5b9731SStan Tomov 
537f5b9731SStan Tomov   CeedInt P1d, Q1d;
54*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChkBackend(ierr);
55*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChkBackend(ierr);
567f5b9731SStan Tomov 
577f5b9731SStan Tomov   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d",
587f5b9731SStan Tomov             ncomp*CeedIntPow(P1d, dim), ncomp);
597f5b9731SStan Tomov 
607f5b9731SStan Tomov   if (tmode == CEED_TRANSPOSE) {
617f5b9731SStan Tomov     CeedInt length;
62*e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetLength(V, &length); CeedChkBackend(ierr);
63e0582403Sabdelfattah83     magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length, data->queue);
64e0582403Sabdelfattah83     ceed_magma_queue_sync( data->queue );
657f5b9731SStan Tomov   }
663513a710Sjeremylt   switch (emode) {
673513a710Sjeremylt   case CEED_EVAL_INTERP: {
687f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
697f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
707f5b9731SStan Tomov       P = Q1d; Q = P1d;
717f5b9731SStan Tomov     }
727f5b9731SStan Tomov 
737f5b9731SStan Tomov     // Define element sizes for dofs/quad
747f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
757f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
767f5b9731SStan Tomov 
777f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
78868539c2SNatalie Beams     //  component                        component
79868539c2SNatalie Beams     //    elem                             elem
807f5b9731SStan Tomov     //       node                            node
817f5b9731SStan Tomov 
827f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
837f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
847f5b9731SStan Tomov 
857f5b9731SStan Tomov     // Element strides
86868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
877f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
887f5b9731SStan Tomov     // Component strides
89868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
907f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
917f5b9731SStan Tomov 
927f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
937f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
947f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
957f5b9731SStan Tomov       // Element strides
96868539c2SNatalie Beams       v_elstride = eldofssize;
977f5b9731SStan Tomov       u_elstride = elquadsize;
987f5b9731SStan Tomov       // Component strides
99868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
1007f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
1017f5b9731SStan Tomov     }
1027f5b9731SStan Tomov 
103e0582403Sabdelfattah83     ierr = magma_interp(P, Q, dim, ncomp,
1047f5b9731SStan Tomov                         impl->dinterp1d, tmode,
105868539c2SNatalie Beams                         u, u_elstride, u_compstride,
106868539c2SNatalie Beams                         v, v_elstride, v_compstride,
107e0582403Sabdelfattah83                         nelem, data->basis_kernel_mode, data->maxthreads,
108e0582403Sabdelfattah83                         data->queue);
109*e15f9bd0SJeremy L Thompson     if (ierr != 0) CeedError(ceed, CEED_ERROR_BACKEND,
110e0582403Sabdelfattah83                                "MAGMA: launch failure detected for magma_interp");
1117f5b9731SStan Tomov   }
1123513a710Sjeremylt   break;
1133513a710Sjeremylt   case CEED_EVAL_GRAD: {
1147f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
1157f5b9731SStan Tomov     // In CEED_NOTRANSPOSE mode:
1167f5b9731SStan Tomov     // u is (P^dim x nc), column-major layout (nc = ncomp)
1177f5b9731SStan Tomov     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
1187f5b9731SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
1197f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1207f5b9731SStan Tomov       P = Q1d, Q = P1d;
1217f5b9731SStan Tomov     }
1227f5b9731SStan Tomov 
1237f5b9731SStan Tomov     // Define element sizes for dofs/quad
1247f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
1257f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
1267f5b9731SStan Tomov 
1277f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
1287f5b9731SStan Tomov     //                                  dim
129868539c2SNatalie Beams     //  component                        component
130868539c2SNatalie Beams     //    elem                              elem
1317f5b9731SStan Tomov     //       node                            node
1327f5b9731SStan Tomov 
1337f5b9731SStan Tomov 
1347f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
1357f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
1367f5b9731SStan Tomov 
1377f5b9731SStan Tomov     // Element strides
138868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
1397f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
1407f5b9731SStan Tomov     // Component strides
141868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
1427f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
1437f5b9731SStan Tomov     // Dimension strides
1447f5b9731SStan Tomov     CeedInt u_dimstride = 0;
1457f5b9731SStan Tomov     CeedInt v_dimstride = nelem * elquadsize * ncomp;
1467f5b9731SStan Tomov 
1477f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
1487f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1497f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
1507f5b9731SStan Tomov       // Element strides
151868539c2SNatalie Beams       v_elstride = eldofssize;
1527f5b9731SStan Tomov       u_elstride = elquadsize;
1537f5b9731SStan Tomov       // Component strides
154868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
1557f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
1567f5b9731SStan Tomov       // Dimension strides
1577f5b9731SStan Tomov       v_dimstride = 0;
1587f5b9731SStan Tomov       u_dimstride = nelem * elquadsize * ncomp;
1597f5b9731SStan Tomov 
1607f5b9731SStan Tomov     }
1617f5b9731SStan Tomov 
162e0582403Sabdelfattah83     ierr = magma_grad( P, Q, dim, ncomp,
1637f5b9731SStan Tomov                        impl->dinterp1d, impl->dgrad1d, tmode,
164e0582403Sabdelfattah83                        u, u_elstride, u_compstride, u_dimstride,
165e0582403Sabdelfattah83                        v, v_elstride, v_compstride, v_dimstride,
166e0582403Sabdelfattah83                        nelem, data->basis_kernel_mode, data->maxthreads,
167e0582403Sabdelfattah83                        data->queue);
168*e15f9bd0SJeremy L Thompson     if (ierr != 0) CeedError(ceed, CEED_ERROR_BACKEND,
169e0582403Sabdelfattah83                                "MAGMA: launch failure detected for magma_grad");
1707f5b9731SStan Tomov   }
1713513a710Sjeremylt   break;
1723513a710Sjeremylt   case CEED_EVAL_WEIGHT: {
1737f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE)
1747f5b9731SStan Tomov       // LCOV_EXCL_START
175*e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
1767f5b9731SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
1777f5b9731SStan Tomov     // LCOV_EXCL_STOP
1787f5b9731SStan Tomov     CeedInt Q = Q1d;
1797f5b9731SStan Tomov     int eldofssize = CeedIntPow(Q, dim);
180e0582403Sabdelfattah83     ierr = magma_weight(Q, dim, impl->dqweight1d, v, eldofssize, nelem,
181e0582403Sabdelfattah83                         data->basis_kernel_mode, data->maxthreads, data->queue);
182*e15f9bd0SJeremy L Thompson     if (ierr != 0) CeedError(ceed, CEED_ERROR_BACKEND,
183e0582403Sabdelfattah83                                "MAGMA: launch failure detected for magma_weight");
1847f5b9731SStan Tomov   }
1853513a710Sjeremylt   break;
1863513a710Sjeremylt   // LCOV_EXCL_START
1873513a710Sjeremylt   case CEED_EVAL_DIV:
188*e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported");
1893513a710Sjeremylt   case CEED_EVAL_CURL:
190*e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported");
1913513a710Sjeremylt   case CEED_EVAL_NONE:
192*e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
1933513a710Sjeremylt                      "CEED_EVAL_NONE does not make sense in this context");
1943513a710Sjeremylt     // LCOV_EXCL_STOP
1953513a710Sjeremylt   }
1967f5b9731SStan Tomov 
197e0582403Sabdelfattah83   // must sync to ensure completeness
198e0582403Sabdelfattah83   ceed_magma_queue_sync( data->queue );
199e0582403Sabdelfattah83 
2007f5b9731SStan Tomov   if (emode!=CEED_EVAL_WEIGHT) {
201*e15f9bd0SJeremy L Thompson     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChkBackend(ierr);
2027f5b9731SStan Tomov   }
203*e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(V, &v); CeedChkBackend(ierr);
204*e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2057f5b9731SStan Tomov }
2067f5b9731SStan Tomov 
2077f5b9731SStan Tomov #ifdef __cplusplus
2087f5b9731SStan Tomov CEED_INTERN "C"
2097f5b9731SStan Tomov #endif
210868539c2SNatalie Beams int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt nelem,
211868539c2SNatalie Beams                                   CeedTransposeMode tmode, CeedEvalMode emode,
212868539c2SNatalie Beams                                   CeedVector U, CeedVector V) {
213868539c2SNatalie Beams   int ierr;
214868539c2SNatalie Beams   Ceed ceed;
215*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
216e0582403Sabdelfattah83 
217e0582403Sabdelfattah83   Ceed_Magma *data;
218*e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
219e0582403Sabdelfattah83 
220868539c2SNatalie Beams   CeedInt dim, ncomp, ndof, nqpt;
221*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
222*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
223*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr);
224*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChkBackend(ierr);
225868539c2SNatalie Beams   const CeedScalar *du;
226868539c2SNatalie Beams   CeedScalar *dv;
227868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
228*e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr);
229868539c2SNatalie Beams   } else if (emode != CEED_EVAL_WEIGHT) {
230868539c2SNatalie Beams     // LCOV_EXCL_START
231*e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
232868539c2SNatalie Beams                      "An input vector is required for this CeedEvalMode");
233868539c2SNatalie Beams     // LCOV_EXCL_STOP
234868539c2SNatalie Beams   }
235*e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr);
236868539c2SNatalie Beams 
237868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
238*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
239868539c2SNatalie Beams 
240868539c2SNatalie Beams   CeedDebug("\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d",
241868539c2SNatalie Beams             ncomp*ndof, ncomp);
242868539c2SNatalie Beams 
243868539c2SNatalie Beams   if (tmode == CEED_TRANSPOSE) {
244868539c2SNatalie Beams     CeedInt length;
245868539c2SNatalie Beams     ierr = CeedVectorGetLength(V, &length);
246e0582403Sabdelfattah83     magmablas_dlaset(MagmaFull, length, 1, 0., 0., dv, length, data->queue);
247e0582403Sabdelfattah83     ceed_magma_queue_sync( data->queue );
248868539c2SNatalie Beams   }
249868539c2SNatalie Beams   switch (emode) {
250868539c2SNatalie Beams   case CEED_EVAL_INTERP: {
251868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
252868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
253e0582403Sabdelfattah83       magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans,
254868539c2SNatalie Beams                             P, nelem*ncomp, Q,
255868539c2SNatalie Beams                             1.0, impl->dinterp, P,
256868539c2SNatalie Beams                             du, Q,
257e0582403Sabdelfattah83                             0.0, dv, P, data->queue);
258868539c2SNatalie Beams     else
259e0582403Sabdelfattah83       magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans,
260868539c2SNatalie Beams                             Q, nelem*ncomp, P,
261868539c2SNatalie Beams                             1.0, impl->dinterp, P,
262868539c2SNatalie Beams                             du, P,
263e0582403Sabdelfattah83                             0.0, dv, Q, data->queue);
264868539c2SNatalie Beams   }
265868539c2SNatalie Beams   break;
266868539c2SNatalie Beams 
267868539c2SNatalie Beams   case CEED_EVAL_GRAD: {
268868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
269868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE) {
270868539c2SNatalie Beams       double beta = 0.0;
271868539c2SNatalie Beams       for(int d=0; d<dim; d++) {
272868539c2SNatalie Beams         if (d>0)
273868539c2SNatalie Beams           beta = 1.0;
274e0582403Sabdelfattah83         magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans,
275868539c2SNatalie Beams                               P, nelem*ncomp, Q,
276868539c2SNatalie Beams                               1.0, impl->dgrad + d*P*Q, P,
277868539c2SNatalie Beams                               du + d*nelem*ncomp*Q, Q,
278e0582403Sabdelfattah83                               beta, dv, P, data->queue);
279868539c2SNatalie Beams       }
280868539c2SNatalie Beams     } else {
281868539c2SNatalie Beams       for(int d=0; d< dim; d++)
282e0582403Sabdelfattah83         magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans,
283868539c2SNatalie Beams                               Q, nelem*ncomp, P,
284868539c2SNatalie Beams                               1.0, impl->dgrad + d*P*Q, P,
285868539c2SNatalie Beams                               du, P,
286e0582403Sabdelfattah83                               0.0, dv + d*nelem*ncomp*Q, Q, data->queue);
287868539c2SNatalie Beams     }
288868539c2SNatalie Beams   }
289868539c2SNatalie Beams   break;
290868539c2SNatalie Beams 
291868539c2SNatalie Beams   case CEED_EVAL_WEIGHT: {
292868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
293868539c2SNatalie Beams       // LCOV_EXCL_START
294*e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
295868539c2SNatalie Beams                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
296868539c2SNatalie Beams     // LCOV_EXCL_STOP
297868539c2SNatalie Beams 
298868539c2SNatalie Beams     int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1;
299868539c2SNatalie Beams     int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)?
300868539c2SNatalie Beams                                        1 : 0 );
301e0582403Sabdelfattah83     magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv,
302e0582403Sabdelfattah83                            data->queue);
303*e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
304868539c2SNatalie Beams   }
305868539c2SNatalie Beams   break;
306868539c2SNatalie Beams 
307868539c2SNatalie Beams   // LCOV_EXCL_START
308868539c2SNatalie Beams   case CEED_EVAL_DIV:
309*e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported");
310868539c2SNatalie Beams   case CEED_EVAL_CURL:
311*e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported");
312868539c2SNatalie Beams   case CEED_EVAL_NONE:
313*e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
314868539c2SNatalie Beams                      "CEED_EVAL_NONE does not make sense in this context");
315868539c2SNatalie Beams     // LCOV_EXCL_STOP
316868539c2SNatalie Beams   }
317868539c2SNatalie Beams 
318e0582403Sabdelfattah83   // must sync to ensure completeness
319e0582403Sabdelfattah83   ceed_magma_queue_sync( data->queue );
320e0582403Sabdelfattah83 
321868539c2SNatalie Beams   if (emode!=CEED_EVAL_WEIGHT) {
322*e15f9bd0SJeremy L Thompson     ierr = CeedVectorRestoreArrayRead(U, &du); CeedChkBackend(ierr);
323868539c2SNatalie Beams   }
324*e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(V, &dv); CeedChkBackend(ierr);
325*e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
326868539c2SNatalie Beams }
327868539c2SNatalie Beams 
328868539c2SNatalie Beams #ifdef __cplusplus
329868539c2SNatalie Beams CEED_INTERN "C"
330868539c2SNatalie Beams #endif
3313513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) {
3327f5b9731SStan Tomov   int ierr;
3337f5b9731SStan Tomov   CeedBasis_Magma *impl;
334*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
3357f5b9731SStan Tomov 
336*e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqref1d); CeedChkBackend(ierr);
337*e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dinterp1d); CeedChkBackend(ierr);
338*e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dgrad1d); CeedChkBackend(ierr);
339*e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqweight1d); CeedChkBackend(ierr);
3407f5b9731SStan Tomov 
341*e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
3427f5b9731SStan Tomov 
343*e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3447f5b9731SStan Tomov }
3457f5b9731SStan Tomov 
3467f5b9731SStan Tomov #ifdef __cplusplus
3477f5b9731SStan Tomov CEED_INTERN "C"
3487f5b9731SStan Tomov #endif
349868539c2SNatalie Beams int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) {
350868539c2SNatalie Beams   int ierr;
351868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
352*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
353868539c2SNatalie Beams 
354*e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqref); CeedChkBackend(ierr);
355*e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dinterp); CeedChkBackend(ierr);
356*e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dgrad); CeedChkBackend(ierr);
357*e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqweight); CeedChkBackend(ierr);
358868539c2SNatalie Beams 
359*e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
360868539c2SNatalie Beams 
361*e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
362868539c2SNatalie Beams }
363868539c2SNatalie Beams 
364868539c2SNatalie Beams #ifdef __cplusplus
365868539c2SNatalie Beams CEED_INTERN "C"
366868539c2SNatalie Beams #endif
3673513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d,
3683513a710Sjeremylt                                   const CeedScalar *interp1d,
3697f5b9731SStan Tomov                                   const CeedScalar *grad1d,
3707f5b9731SStan Tomov                                   const CeedScalar *qref1d,
3713513a710Sjeremylt                                   const CeedScalar *qweight1d, CeedBasis basis) {
3727f5b9731SStan Tomov   int ierr;
3737f5b9731SStan Tomov   CeedBasis_Magma *impl;
3747f5b9731SStan Tomov   Ceed ceed;
375*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
3767f5b9731SStan Tomov 
377c9f8acf2SJeremy L Thompson   // Check for supported parameters
378c9f8acf2SJeremy L Thompson   CeedInt ncomp = 0;
379*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
380c9f8acf2SJeremy L Thompson   if (ncomp > 3)
381c9f8acf2SJeremy L Thompson     // LCOV_EXCL_START
382*e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
383c9f8acf2SJeremy L Thompson                      "Magma backend does not support tensor bases with more than 3 components");
384c9f8acf2SJeremy L Thompson   // LCOV_EXCL_STOP
385c9f8acf2SJeremy L Thompson   if (P1d > 10)
386c9f8acf2SJeremy L Thompson     // LCOV_EXCL_START
387*e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
388c9f8acf2SJeremy L Thompson                      "Magma backend does not support tensor bases with more than 10 nodes in each dimension");
389c9f8acf2SJeremy L Thompson   // LCOV_EXCL_STOP
390c9f8acf2SJeremy L Thompson   if (Q1d > 10)
391c9f8acf2SJeremy L Thompson     // LCOV_EXCL_START
392*e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
393c9f8acf2SJeremy L Thompson                      "Magma backend does not support tensor bases with more than 10 quadrature points in each dimension");
394c9f8acf2SJeremy L Thompson   // LCOV_EXCL_STOP
395c9f8acf2SJeremy L Thompson 
396e0582403Sabdelfattah83   Ceed_Magma *data;
397*e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
398e0582403Sabdelfattah83 
3997f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
400*e15f9bd0SJeremy L Thompson                                 CeedBasisApply_Magma); CeedChkBackend(ierr);
4017f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
402*e15f9bd0SJeremy L Thompson                                 CeedBasisDestroy_Magma); CeedChkBackend(ierr);
4037f5b9731SStan Tomov 
404*e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr);
405*e15f9bd0SJeremy L Thompson   ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr);
4067f5b9731SStan Tomov 
4077f5b9731SStan Tomov   // Copy qref1d to the GPU
4087f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0]));
409*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
410e0582403Sabdelfattah83   magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1,
411e0582403Sabdelfattah83                   data->queue);
4127f5b9731SStan Tomov 
4137f5b9731SStan Tomov   // Copy interp1d to the GPU
4147f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0]));
415*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
416e0582403Sabdelfattah83   magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1,
417e0582403Sabdelfattah83                   data->queue);
4187f5b9731SStan Tomov 
4197f5b9731SStan Tomov   // Copy grad1d to the GPU
4207f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0]));
421*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
422e0582403Sabdelfattah83   magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1,
423e0582403Sabdelfattah83                   data->queue);
4247f5b9731SStan Tomov 
4257f5b9731SStan Tomov   // Copy qweight1d to the GPU
4267f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0]));
427*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
428e0582403Sabdelfattah83   magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1,
429e0582403Sabdelfattah83                   data->queue);
4307f5b9731SStan Tomov 
431*e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4327f5b9731SStan Tomov }
4337f5b9731SStan Tomov 
4347f5b9731SStan Tomov #ifdef __cplusplus
4357f5b9731SStan Tomov CEED_INTERN "C"
4367f5b9731SStan Tomov #endif
4373513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof,
4383513a710Sjeremylt                             CeedInt nqpts, const CeedScalar *interp,
4393513a710Sjeremylt                             const CeedScalar *grad, const CeedScalar *qref,
4403513a710Sjeremylt                             const CeedScalar *qweight, CeedBasis basis) {
4417f5b9731SStan Tomov   int ierr;
442868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
4437f5b9731SStan Tomov   Ceed ceed;
444*e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
4457f5b9731SStan Tomov 
446e0582403Sabdelfattah83   Ceed_Magma *data;
447*e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
448e0582403Sabdelfattah83 
449868539c2SNatalie Beams   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
450*e15f9bd0SJeremy L Thompson                                 CeedBasisApplyNonTensor_Magma); CeedChkBackend(ierr);
451868539c2SNatalie Beams   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
452*e15f9bd0SJeremy L Thompson                                 CeedBasisDestroyNonTensor_Magma); CeedChkBackend(ierr);
453868539c2SNatalie Beams 
454*e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr);
455*e15f9bd0SJeremy L Thompson   ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr);
456868539c2SNatalie Beams 
457868539c2SNatalie Beams   // Copy qref to the GPU
458868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0]));
459*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
460e0582403Sabdelfattah83   magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1, data->queue);
461868539c2SNatalie Beams 
462868539c2SNatalie Beams   // Copy interp to the GPU
463868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0]));
464*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
465e0582403Sabdelfattah83   magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1,
466e0582403Sabdelfattah83                   data->queue);
467868539c2SNatalie Beams 
468868539c2SNatalie Beams   // Copy grad to the GPU
469868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0]));
470*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
471e0582403Sabdelfattah83   magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1,
472e0582403Sabdelfattah83                   data->queue);
473868539c2SNatalie Beams 
474868539c2SNatalie Beams   // Copy qweight to the GPU
475868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0]));
476*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
477e0582403Sabdelfattah83   magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1,
478e0582403Sabdelfattah83                   data->queue);
479868539c2SNatalie Beams 
480*e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4817f5b9731SStan Tomov }
482