xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision 868539c291cd6e4adc5c1e2f0ea123f6c9e198f6)
14444f328STzanio // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
24444f328STzanio // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
34444f328STzanio // reserved. See files LICENSE and NOTICE for details.
44444f328STzanio //
54444f328STzanio // This file is part of CEED, a collection of benchmarks, miniapps, software
64444f328STzanio // libraries and APIs for efficient high-order finite element and spectral
74444f328STzanio // element discretizations for exascale applications. For more information and
84444f328STzanio // source code availability see http://github.com/ceed.
94444f328STzanio //
104444f328STzanio // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
114444f328STzanio // a collaborative effort of two U.S. Department of Energy organizations (Office
124444f328STzanio // of Science and the National Nuclear Security Administration) responsible for
134444f328STzanio // the planning and preparation of a capable exascale ecosystem, including
144444f328STzanio // software, applications, hardware, advanced system engineering and early
154444f328STzanio // testbed platforms, in support of the nation's exascale computing imperative.
164444f328STzanio 
1790104f39SStan Tomov // magma functions specific to ceed
1890104f39SStan Tomov 
197f5b9731SStan Tomov #include <string.h>
20d863ab9bSjeremylt #include <ceed-backend.h>
215a9ca9adSVeselin Dobrev #include "magma.h"
225a9ca9adSVeselin Dobrev 
237f5b9731SStan Tomov typedef struct {
247f5b9731SStan Tomov   CeedScalar *dqref1d;
257f5b9731SStan Tomov   CeedScalar *dinterp1d;
267f5b9731SStan Tomov   CeedScalar *dgrad1d;
277f5b9731SStan Tomov   CeedScalar *dqweight1d;
287f5b9731SStan Tomov } CeedBasis_Magma;
297f5b9731SStan Tomov 
307f5b9731SStan Tomov typedef struct {
31*868539c2SNatalie Beams   CeedScalar *dqref;
32*868539c2SNatalie Beams   CeedScalar *dinterp;
33*868539c2SNatalie Beams   CeedScalar *dgrad;
34*868539c2SNatalie Beams   CeedScalar *dqweight;
35*868539c2SNatalie Beams } CeedBasisNonTensor_Magma;
36*868539c2SNatalie Beams 
37*868539c2SNatalie Beams typedef struct {
38*868539c2SNatalie Beams   CeedInt *indices;
39*868539c2SNatalie Beams   CeedInt *dindices;
40*868539c2SNatalie Beams   int  own_;
41*868539c2SNatalie Beams   int down_;            // cover a case where we own Device memory
42*868539c2SNatalie Beams } CeedElemRestriction_Magma;
43*868539c2SNatalie Beams 
44*868539c2SNatalie Beams typedef struct {
457f5b9731SStan Tomov   const CeedScalar **inputs;
467f5b9731SStan Tomov   CeedScalar **outputs;
477f5b9731SStan Tomov   bool setupdone;
487f5b9731SStan Tomov } CeedQFunction_Magma;
497f5b9731SStan Tomov 
5090104f39SStan Tomov #define USE_MAGMA_BATCH
5197ee337cSStan Tomov #define USE_MAGMA_BATCH2
527f5b9731SStan Tomov #define USE_MAGMA_BATCH3
537f5b9731SStan Tomov #define USE_MAGMA_BATCH4
5490104f39SStan Tomov 
557f5b9731SStan Tomov #ifdef __cplusplus
567f5b9731SStan Tomov CEED_INTERN {
577f5b9731SStan Tomov #endif
583513a710Sjeremylt   void magmablas_dbasis_apply_batched_eval_interp(magma_int_t P, magma_int_t Q,
59*868539c2SNatalie Beams       magma_int_t dim, magma_int_t ncomp,
60*868539c2SNatalie Beams       const double *dT, CeedTransposeMode tmode,
61*868539c2SNatalie Beams       const double *dU, magma_int_t u_elemstride,
62*868539c2SNatalie Beams       magma_int_t u_compstride,
63*868539c2SNatalie Beams       double *dV, magma_int_t v_elemstride,
64*868539c2SNatalie Beams       magma_int_t v_compstride,
65*868539c2SNatalie Beams       magma_int_t nelem);
667f5b9731SStan Tomov 
673513a710Sjeremylt   void magmablas_dbasis_apply_batched_eval_grad(magma_int_t P, magma_int_t Q,
68*868539c2SNatalie Beams       magma_int_t dim, magma_int_t ncomp,
69*868539c2SNatalie Beams       magma_int_t nqpt, const double* dinterp1d,
70*868539c2SNatalie Beams       const double *dgrad1d, CeedTransposeMode tmode,
71*868539c2SNatalie Beams       const double *dU, magma_int_t u_elemstride,
72*868539c2SNatalie Beams       magma_int_t u_compstride, magma_int_t u_dimstride,
73*868539c2SNatalie Beams       double *dV, magma_int_t v_elemstride,
74*868539c2SNatalie Beams       magma_int_t v_compstride, magma_int_t v_dimstride,
75*868539c2SNatalie Beams       magma_int_t dim_id, magma_int_t nelem);
767f5b9731SStan Tomov 
77*868539c2SNatalie Beams   void magmablas_dbasis_apply_batched_eval_weight(magma_int_t Q, magma_int_t dim,
78*868539c2SNatalie Beams       const double *dqweight1d, double *dV,
79*868539c2SNatalie Beams       magma_int_t v_elemstride,
80*868539c2SNatalie Beams       magma_int_t nelem);
81*868539c2SNatalie Beams 
82*868539c2SNatalie Beams   void magma_weight(magma_int_t grid, magma_int_t threads, magma_int_t nelem,
83*868539c2SNatalie Beams                     magma_int_t Q,
84*868539c2SNatalie Beams                     double *dqweight, double *dv);
85*868539c2SNatalie Beams 
86*868539c2SNatalie Beams   void magma_readDofs(const magma_int_t NCOMP,
87*868539c2SNatalie Beams                       const magma_int_t nnodes,
88*868539c2SNatalie Beams                       const magma_int_t esize,
89*868539c2SNatalie Beams                       const magma_int_t nelem, magma_int_t *indices,
90*868539c2SNatalie Beams                       const double *du, double *dv);
91*868539c2SNatalie Beams 
92*868539c2SNatalie Beams   void magma_readDofsTranspose(const magma_int_t NCOMP,
93*868539c2SNatalie Beams                                const magma_int_t nnodes,
94*868539c2SNatalie Beams                                const magma_int_t esize,
95*868539c2SNatalie Beams                                const magma_int_t nelem, magma_int_t *indices,
96*868539c2SNatalie Beams                                const double *du, double *dv);
97*868539c2SNatalie Beams 
98*868539c2SNatalie Beams   void magma_writeDofs(const magma_int_t NCOMP,
99*868539c2SNatalie Beams                        const magma_int_t nnodes,
100*868539c2SNatalie Beams                        const magma_int_t esize,
101*868539c2SNatalie Beams                        const magma_int_t nelem, magma_int_t *indices,
102*868539c2SNatalie Beams                        const double *du, double *dv);
103*868539c2SNatalie Beams 
104*868539c2SNatalie Beams   void magma_writeDofsTranspose(const magma_int_t NCOMP,
105*868539c2SNatalie Beams                                 const magma_int_t nnodes,
106*868539c2SNatalie Beams                                 const magma_int_t esize,
107*868539c2SNatalie Beams                                 const magma_int_t nelem, magma_int_t *indices,
108*868539c2SNatalie Beams                                 const double *du, double *dv);
1097f5b9731SStan Tomov 
1107f5b9731SStan Tomov   magma_int_t
1117f5b9731SStan Tomov   magma_isdevptr(const void *A);
1127f5b9731SStan Tomov 
113*868539c2SNatalie Beams   int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
114*868539c2SNatalie Beams                                     CeedInt Q1d,
115*868539c2SNatalie Beams                                     const CeedScalar *interp1d,
116*868539c2SNatalie Beams                                     const CeedScalar *grad1d,
117*868539c2SNatalie Beams                                     const CeedScalar *qref1d,
118*868539c2SNatalie Beams                                     const CeedScalar *qweight1d,
119*868539c2SNatalie Beams                                     CeedBasis basis);
1207f5b9731SStan Tomov 
121*868539c2SNatalie Beams   int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
122d4f68153Sjeremylt                               CeedInt ndof, CeedInt nqpts,
123d4f68153Sjeremylt                               const CeedScalar *interp,
124d4f68153Sjeremylt                               const CeedScalar *grad,
125d4f68153Sjeremylt                               const CeedScalar *qref,
126d4f68153Sjeremylt                               const CeedScalar *qweight,
127d4f68153Sjeremylt                               CeedBasis basis);
128*868539c2SNatalie Beams 
129*868539c2SNatalie Beams   int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
130*868539c2SNatalie Beams                                       CeedCopyMode cmode,
131*868539c2SNatalie Beams                                       const CeedInt *indices,
132*868539c2SNatalie Beams                                       CeedElemRestriction r);
133*868539c2SNatalie Beams 
134*868539c2SNatalie Beams   int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype,
135*868539c2SNatalie Beams       const CeedCopyMode cmode,
136*868539c2SNatalie Beams       const CeedInt *indices,
137*868539c2SNatalie Beams       const CeedElemRestriction res);
1387f5b9731SStan Tomov   #ifdef __cplusplus
1397f5b9731SStan Tomov }
1407f5b9731SStan Tomov   #endif
1417f5b9731SStan Tomov 
142f68f2f01STzanio #define CeedDebug(...)
1437f5b9731SStan Tomov //#define CeedDebug(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
1447f5b9731SStan Tomov 
1457f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function
1467f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr
1477f5b9731SStan Tomov 
1487f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
1497f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE
1507f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000)
1517f5b9731SStan Tomov #endif
152