xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision 3513a71063a013a065fe70dec49cba820e0fc134)
17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details.
47f5b9731SStan Tomov //
57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and
87f5b9731SStan Tomov // source code availability see http://github.com/ceed.
97f5b9731SStan Tomov //
107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early
157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
167f5b9731SStan Tomov 
177f5b9731SStan Tomov #include "ceed-magma.h"
187f5b9731SStan Tomov 
197f5b9731SStan Tomov #ifdef __cplusplus
207f5b9731SStan Tomov CEED_INTERN "C"
217f5b9731SStan Tomov #endif
227f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
237f5b9731SStan Tomov                          CeedTransposeMode tmode, CeedEvalMode emode,
24*3513a710Sjeremylt                          CeedVector U, CeedVector V) {
257f5b9731SStan Tomov   int ierr;
267f5b9731SStan Tomov   Ceed ceed;
277f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
287f5b9731SStan Tomov   CeedInt dim, ncomp, ndof, nqpt;
297f5b9731SStan Tomov   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
307f5b9731SStan Tomov   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
317f5b9731SStan Tomov   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
327f5b9731SStan Tomov   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
337f5b9731SStan Tomov   const CeedScalar *u;
347f5b9731SStan Tomov   CeedScalar *v;
357f5b9731SStan Tomov   if (U) {
367f5b9731SStan Tomov     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChk(ierr);
377f5b9731SStan Tomov   } else if (emode != CEED_EVAL_WEIGHT) {
387f5b9731SStan Tomov     // LCOV_EXCL_START
397f5b9731SStan Tomov     return CeedError(ceed, 1,
407f5b9731SStan Tomov                      "An input vector is required for this CeedEvalMode");
417f5b9731SStan Tomov     // LCOV_EXCL_STOP
427f5b9731SStan Tomov   }
437f5b9731SStan Tomov   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChk(ierr);
447f5b9731SStan Tomov 
457f5b9731SStan Tomov   CeedBasis_Magma *impl;
467f5b9731SStan Tomov   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
477f5b9731SStan Tomov 
487f5b9731SStan Tomov   CeedInt P1d, Q1d;
497f5b9731SStan Tomov   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr);
507f5b9731SStan Tomov   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr);
517f5b9731SStan Tomov 
527f5b9731SStan Tomov   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d",
537f5b9731SStan Tomov             ncomp*CeedIntPow(P1d, dim), ncomp);
547f5b9731SStan Tomov 
557f5b9731SStan Tomov   if (tmode == CEED_TRANSPOSE) {
567f5b9731SStan Tomov     CeedInt length;
577f5b9731SStan Tomov     ierr = CeedVectorGetLength(V, &length);
587f5b9731SStan Tomov     magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length);
597f5b9731SStan Tomov   }
60*3513a710Sjeremylt   switch (emode) {
61*3513a710Sjeremylt   case CEED_EVAL_INTERP: {
627f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
637f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
647f5b9731SStan Tomov       P = Q1d; Q = P1d;
657f5b9731SStan Tomov     }
667f5b9731SStan Tomov 
677f5b9731SStan Tomov     // Define element sizes for dofs/quad
687f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
697f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
707f5b9731SStan Tomov 
717f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
727f5b9731SStan Tomov     //  elem                            component
737f5b9731SStan Tomov     //    component                        elem
747f5b9731SStan Tomov     //       node                            node
757f5b9731SStan Tomov 
767f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
777f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
787f5b9731SStan Tomov 
797f5b9731SStan Tomov     // Element strides
807f5b9731SStan Tomov     CeedInt u_elstride = ncomp * eldofssize;
817f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
827f5b9731SStan Tomov     // Component strides
837f5b9731SStan Tomov     CeedInt u_compstride = eldofssize;
847f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
857f5b9731SStan Tomov 
867f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
877f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
887f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
897f5b9731SStan Tomov       // Element strides
907f5b9731SStan Tomov       v_elstride = ncomp * eldofssize;
917f5b9731SStan Tomov       u_elstride = elquadsize;
927f5b9731SStan Tomov       // Component strides
937f5b9731SStan Tomov       v_compstride = eldofssize;
947f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
957f5b9731SStan Tomov     }
967f5b9731SStan Tomov 
977f5b9731SStan Tomov     // Loop through components and apply batch over elements
98*3513a710Sjeremylt     for (CeedInt comp_ctr = 0; comp_ctr < ncomp; comp_ctr++)
997f5b9731SStan Tomov       magmablas_dbasis_apply_batched_eval_interp(P, Q, dim, ncomp,
1007f5b9731SStan Tomov           impl->dinterp1d, tmode,
1017f5b9731SStan Tomov           u + u_compstride * comp_ctr, u_elstride,
1027f5b9731SStan Tomov           v + v_compstride * comp_ctr, v_elstride,
1037f5b9731SStan Tomov           nelem);
1047f5b9731SStan Tomov   }
105*3513a710Sjeremylt   break;
106*3513a710Sjeremylt   case CEED_EVAL_GRAD: {
1077f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
1087f5b9731SStan Tomov     // In CEED_NOTRANSPOSE mode:
1097f5b9731SStan Tomov     // u is (P^dim x nc), column-major layout (nc = ncomp)
1107f5b9731SStan Tomov     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
1117f5b9731SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
1127f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1137f5b9731SStan Tomov       P = Q1d, Q = P1d;
1147f5b9731SStan Tomov     }
1157f5b9731SStan Tomov 
1167f5b9731SStan Tomov     // Define element sizes for dofs/quad
1177f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
1187f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
1197f5b9731SStan Tomov 
1207f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
1217f5b9731SStan Tomov     //                                  dim
1227f5b9731SStan Tomov     //  elem                             component
1237f5b9731SStan Tomov     //    component                        elem
1247f5b9731SStan Tomov     //       node                            node
1257f5b9731SStan Tomov 
1267f5b9731SStan Tomov 
1277f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
1287f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
1297f5b9731SStan Tomov 
1307f5b9731SStan Tomov     // Element strides
1317f5b9731SStan Tomov     CeedInt u_elstride = ncomp * eldofssize;
1327f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
1337f5b9731SStan Tomov     // Component strides
1347f5b9731SStan Tomov     CeedInt u_compstride = eldofssize;
1357f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
1367f5b9731SStan Tomov     // Dimension strides
1377f5b9731SStan Tomov     CeedInt u_dimstride = 0;
1387f5b9731SStan Tomov     CeedInt v_dimstride = nelem * elquadsize * ncomp;
1397f5b9731SStan Tomov 
1407f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
1417f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1427f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
1437f5b9731SStan Tomov       // Element strides
1447f5b9731SStan Tomov       v_elstride = ncomp * eldofssize;
1457f5b9731SStan Tomov       u_elstride = elquadsize;
1467f5b9731SStan Tomov       // Component strides
1477f5b9731SStan Tomov       v_compstride = eldofssize;
1487f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
1497f5b9731SStan Tomov       // Dimension strides
1507f5b9731SStan Tomov       v_dimstride = 0;
1517f5b9731SStan Tomov       u_dimstride = nelem * elquadsize * ncomp;
1527f5b9731SStan Tomov 
1537f5b9731SStan Tomov     }
1547f5b9731SStan Tomov 
1557f5b9731SStan Tomov     // Loop through grad dimensions and components, batch call over elements
156*3513a710Sjeremylt     for (CeedInt dim_ctr = 0; dim_ctr < dim; dim_ctr++)
157*3513a710Sjeremylt       for (CeedInt comp_ctr = 0; comp_ctr < ncomp; comp_ctr++)
1587f5b9731SStan Tomov         magmablas_dbasis_apply_batched_eval_grad(P, Q, dim, ncomp, nqpt,
1597f5b9731SStan Tomov             impl->dinterp1d, impl->dgrad1d, tmode,
160*3513a710Sjeremylt             u + dim_ctr * u_dimstride + u_compstride * comp_ctr, u_elstride,
1617f5b9731SStan Tomov             v + dim_ctr * v_dimstride + v_compstride * comp_ctr,
162*3513a710Sjeremylt             v_elstride, nelem, dim_ctr);
1637f5b9731SStan Tomov   }
164*3513a710Sjeremylt   break;
165*3513a710Sjeremylt   case CEED_EVAL_WEIGHT: {
1667f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE)
1677f5b9731SStan Tomov       // LCOV_EXCL_START
1687f5b9731SStan Tomov       return CeedError(ceed, 1,
1697f5b9731SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
1707f5b9731SStan Tomov     // LCOV_EXCL_STOP
1717f5b9731SStan Tomov     CeedInt Q = Q1d;
1727f5b9731SStan Tomov     int eldofssize = CeedIntPow(Q, dim);
1737f5b9731SStan Tomov     magmablas_dbasis_apply_batched_eval_weight(Q, dim, impl->dqweight1d,
1747f5b9731SStan Tomov         v, eldofssize,
1757f5b9731SStan Tomov         nelem);
1767f5b9731SStan Tomov   }
177*3513a710Sjeremylt   break;
178*3513a710Sjeremylt   // LCOV_EXCL_START
179*3513a710Sjeremylt   case CEED_EVAL_DIV:
180*3513a710Sjeremylt     return CeedError(ceed, 1, "CEED_EVAL_DIV not supported");
181*3513a710Sjeremylt   case CEED_EVAL_CURL:
182*3513a710Sjeremylt     return CeedError(ceed, 1, "CEED_EVAL_CURL not supported");
183*3513a710Sjeremylt   case CEED_EVAL_NONE:
184*3513a710Sjeremylt     return CeedError(ceed, 1,
185*3513a710Sjeremylt                      "CEED_EVAL_NONE does not make sense in this context");
186*3513a710Sjeremylt     // LCOV_EXCL_STOP
187*3513a710Sjeremylt   }
1887f5b9731SStan Tomov 
1897f5b9731SStan Tomov   if (emode!=CEED_EVAL_WEIGHT) {
1907f5b9731SStan Tomov     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr);
1917f5b9731SStan Tomov   }
1927f5b9731SStan Tomov   ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr);
1937f5b9731SStan Tomov   return 0;
1947f5b9731SStan Tomov }
1957f5b9731SStan Tomov 
1967f5b9731SStan Tomov #ifdef __cplusplus
1977f5b9731SStan Tomov CEED_INTERN "C"
1987f5b9731SStan Tomov #endif
199*3513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) {
2007f5b9731SStan Tomov   int ierr;
2017f5b9731SStan Tomov   CeedBasis_Magma *impl;
2027f5b9731SStan Tomov   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
2037f5b9731SStan Tomov 
2047f5b9731SStan Tomov   ierr = magma_free(impl->dqref1d); CeedChk(ierr);
2057f5b9731SStan Tomov   ierr = magma_free(impl->dinterp1d); CeedChk(ierr);
2067f5b9731SStan Tomov   ierr = magma_free(impl->dgrad1d); CeedChk(ierr);
2077f5b9731SStan Tomov   ierr = magma_free(impl->dqweight1d); CeedChk(ierr);
2087f5b9731SStan Tomov 
2097f5b9731SStan Tomov   ierr = CeedFree(&impl); CeedChk(ierr);
2107f5b9731SStan Tomov 
2117f5b9731SStan Tomov   return 0;
2127f5b9731SStan Tomov }
2137f5b9731SStan Tomov 
2147f5b9731SStan Tomov #ifdef __cplusplus
2157f5b9731SStan Tomov CEED_INTERN "C"
2167f5b9731SStan Tomov #endif
217*3513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d,
218*3513a710Sjeremylt                                   const CeedScalar *interp1d,
2197f5b9731SStan Tomov                                   const CeedScalar *grad1d,
2207f5b9731SStan Tomov                                   const CeedScalar *qref1d,
221*3513a710Sjeremylt                                   const CeedScalar *qweight1d, CeedBasis basis) {
2227f5b9731SStan Tomov   int ierr;
2237f5b9731SStan Tomov   CeedBasis_Magma *impl;
2247f5b9731SStan Tomov   Ceed ceed;
2257f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
2267f5b9731SStan Tomov 
2277f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
2287f5b9731SStan Tomov                                 CeedBasisApply_Magma); CeedChk(ierr);
2297f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
2307f5b9731SStan Tomov                                 CeedBasisDestroy_Magma); CeedChk(ierr);
2317f5b9731SStan Tomov 
2327f5b9731SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
2337f5b9731SStan Tomov   ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr);
2347f5b9731SStan Tomov 
2357f5b9731SStan Tomov   // Copy qref1d to the GPU
2367f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0]));
2377f5b9731SStan Tomov   CeedChk(ierr);
2387f5b9731SStan Tomov   magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1);
2397f5b9731SStan Tomov 
2407f5b9731SStan Tomov   // Copy interp1d to the GPU
2417f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0]));
2427f5b9731SStan Tomov   CeedChk(ierr);
2437f5b9731SStan Tomov   magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1);
2447f5b9731SStan Tomov 
2457f5b9731SStan Tomov   // Copy grad1d to the GPU
2467f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0]));
2477f5b9731SStan Tomov   CeedChk(ierr);
2487f5b9731SStan Tomov   magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1);
2497f5b9731SStan Tomov 
2507f5b9731SStan Tomov   // Copy qweight1d to the GPU
2517f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0]));
2527f5b9731SStan Tomov   CeedChk(ierr);
2537f5b9731SStan Tomov   magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1);
2547f5b9731SStan Tomov 
2557f5b9731SStan Tomov   return 0;
2567f5b9731SStan Tomov }
2577f5b9731SStan Tomov 
2587f5b9731SStan Tomov #ifdef __cplusplus
2597f5b9731SStan Tomov CEED_INTERN "C"
2607f5b9731SStan Tomov #endif
261*3513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof,
262*3513a710Sjeremylt                             CeedInt nqpts, const CeedScalar *interp,
263*3513a710Sjeremylt                             const CeedScalar *grad, const CeedScalar *qref,
264*3513a710Sjeremylt                             const CeedScalar *qweight, CeedBasis basis) {
2657f5b9731SStan Tomov   int ierr;
2667f5b9731SStan Tomov   Ceed ceed;
2677f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
2687f5b9731SStan Tomov 
2697f5b9731SStan Tomov   return CeedError(ceed, 1, "Backend does not implement non-tensor bases");
2707f5b9731SStan Tomov }
271