xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision f71aa81bd7d2e9c6555cba4570cf145ac8d1aa26)
14444f328STzanio // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
24444f328STzanio // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
34444f328STzanio // reserved. See files LICENSE and NOTICE for details.
44444f328STzanio //
54444f328STzanio // This file is part of CEED, a collection of benchmarks, miniapps, software
64444f328STzanio // libraries and APIs for efficient high-order finite element and spectral
74444f328STzanio // element discretizations for exascale applications. For more information and
84444f328STzanio // source code availability see http://github.com/ceed.
94444f328STzanio //
104444f328STzanio // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
114444f328STzanio // a collaborative effort of two U.S. Department of Energy organizations (Office
124444f328STzanio // of Science and the National Nuclear Security Administration) responsible for
134444f328STzanio // the planning and preparation of a capable exascale ecosystem, including
144444f328STzanio // software, applications, hardware, advanced system engineering and early
154444f328STzanio // testbed platforms, in support of the nation's exascale computing imperative.
164444f328STzanio 
1790104f39SStan Tomov // magma functions specific to ceed
18972b3d9dSNatalie Beams #ifndef _ceed_magma_h
193d576824SJeremy L Thompson #define _ceed_magma_h
2090104f39SStan Tomov 
21ec3da8bcSJed Brown #include <ceed/ceed.h>
22ec3da8bcSJed Brown #include <ceed/backend.h>
23e0582403Sabdelfattah83 #include <magma_v2.h>
24e0582403Sabdelfattah83 
25e0582403Sabdelfattah83 typedef enum {
26e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_GENERIC=101,
27e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_SPECIFIC=102
28e0582403Sabdelfattah83 } magma_kernel_mode_t;
29e0582403Sabdelfattah83 
30e0582403Sabdelfattah83 typedef struct {
31e0582403Sabdelfattah83   magma_kernel_mode_t basis_kernel_mode;
32e0582403Sabdelfattah83   magma_device_t device;
33e0582403Sabdelfattah83   magma_queue_t queue;
34e0582403Sabdelfattah83 } Ceed_Magma;
355a9ca9adSVeselin Dobrev 
367f5b9731SStan Tomov typedef struct {
377f5b9731SStan Tomov   CeedScalar *dqref1d;
387f5b9731SStan Tomov   CeedScalar *dinterp1d;
397f5b9731SStan Tomov   CeedScalar *dgrad1d;
407f5b9731SStan Tomov   CeedScalar *dqweight1d;
417f5b9731SStan Tomov } CeedBasis_Magma;
427f5b9731SStan Tomov 
437f5b9731SStan Tomov typedef struct {
44868539c2SNatalie Beams   CeedScalar *dqref;
45868539c2SNatalie Beams   CeedScalar *dinterp;
46868539c2SNatalie Beams   CeedScalar *dgrad;
47868539c2SNatalie Beams   CeedScalar *dqweight;
48868539c2SNatalie Beams } CeedBasisNonTensor_Magma;
49868539c2SNatalie Beams 
50868539c2SNatalie Beams typedef struct {
51d655899aSNatalie Beams   CeedInt *offsets;
52d655899aSNatalie Beams   CeedInt *doffsets;
53868539c2SNatalie Beams   int  own_;
54868539c2SNatalie Beams   int down_;            // cover a case where we own Device memory
55868539c2SNatalie Beams } CeedElemRestriction_Magma;
56868539c2SNatalie Beams 
57868539c2SNatalie Beams typedef struct {
587f5b9731SStan Tomov   const CeedScalar **inputs;
597f5b9731SStan Tomov   CeedScalar **outputs;
607f5b9731SStan Tomov   bool setupdone;
617f5b9731SStan Tomov } CeedQFunction_Magma;
627f5b9731SStan Tomov 
6390104f39SStan Tomov #define USE_MAGMA_BATCH
6497ee337cSStan Tomov #define USE_MAGMA_BATCH2
657f5b9731SStan Tomov #define USE_MAGMA_BATCH3
667f5b9731SStan Tomov #define USE_MAGMA_BATCH4
6790104f39SStan Tomov 
687f5b9731SStan Tomov #ifdef __cplusplus
697f5b9731SStan Tomov CEED_INTERN {
707f5b9731SStan Tomov #endif
71e0582403Sabdelfattah83 
72e0582403Sabdelfattah83   magma_int_t magma_interp_1d(
73e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
74e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
75e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
76e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
77*f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
78e0582403Sabdelfattah83 
79e0582403Sabdelfattah83   magma_int_t magma_interp_2d(
80e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
81e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
82e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
83e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
84*f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
85e0582403Sabdelfattah83 
86e0582403Sabdelfattah83   magma_int_t magma_interp_3d(
87e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
88e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
89e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
90e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
91*f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
92e0582403Sabdelfattah83 
93e0582403Sabdelfattah83   magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q,
94868539c2SNatalie Beams                                    magma_int_t dim, magma_int_t ncomp,
9580a9ef05SNatalie Beams                                    const CeedScalar *dT, CeedTransposeMode tmode,
9680a9ef05SNatalie Beams                                    const CeedScalar *dU, magma_int_t u_elemstride,
97e0582403Sabdelfattah83                                    magma_int_t cstrdU,
9880a9ef05SNatalie Beams                                    CeedScalar *dV, magma_int_t v_elemstride,
99e0582403Sabdelfattah83                                    magma_int_t cstrdV,
100e0582403Sabdelfattah83                                    magma_int_t nelem, magma_queue_t queue);
1017f5b9731SStan Tomov 
102e0582403Sabdelfattah83   magma_int_t magma_interp(
103e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q,
104868539c2SNatalie Beams     magma_int_t dim, magma_int_t ncomp,
10580a9ef05SNatalie Beams     const CeedScalar *dT, CeedTransposeMode tmode,
10680a9ef05SNatalie Beams     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
10780a9ef05SNatalie Beams     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
108*f71aa81bSnbeams     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
1097f5b9731SStan Tomov 
110e0582403Sabdelfattah83   magma_int_t magma_grad_1d(
111e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
112e0582403Sabdelfattah83     const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode,
113e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
114e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
115*f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
116868539c2SNatalie Beams 
117e0582403Sabdelfattah83   magma_int_t magma_gradn_2d(
118e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
119e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
120e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
121e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
122*f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
123e0582403Sabdelfattah83 
124e0582403Sabdelfattah83   magma_int_t magma_gradt_2d(
125e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
126e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
127e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
128e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
129*f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
130e0582403Sabdelfattah83 
131e0582403Sabdelfattah83   magma_int_t magma_gradn_3d(
132e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
133e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
134e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
135e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
136*f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
137e0582403Sabdelfattah83 
138e0582403Sabdelfattah83   magma_int_t magma_gradt_3d(
139e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
140e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
141e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
142e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
143*f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
144e0582403Sabdelfattah83 
145e0582403Sabdelfattah83   magma_int_t magma_grad_generic(
146e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
147e0582403Sabdelfattah83     const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
148e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
149e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
150e0582403Sabdelfattah83     magma_int_t nelem, magma_queue_t queue);
151e0582403Sabdelfattah83 
152e0582403Sabdelfattah83   magma_int_t magma_grad(
153e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
154e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
155e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU,
156e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV,
157*f71aa81bSnbeams     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
158e0582403Sabdelfattah83 
159e0582403Sabdelfattah83   magma_int_t magma_weight_1d(
160e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
161e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
162*f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
163e0582403Sabdelfattah83 
164e0582403Sabdelfattah83   magma_int_t magma_weight_2d(
165e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
166e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
167*f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
168e0582403Sabdelfattah83 
169e0582403Sabdelfattah83   magma_int_t magma_weight_3d(
170e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
171e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
172*f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
173e0582403Sabdelfattah83 
174e0582403Sabdelfattah83   magma_int_t magma_weight_generic(
175e0582403Sabdelfattah83     magma_int_t Q, magma_int_t dim,
176e0582403Sabdelfattah83     const CeedScalar *dqweight1d,
177e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t vstride,
178e0582403Sabdelfattah83     magma_int_t nelem, magma_queue_t queue);
179e0582403Sabdelfattah83 
180e0582403Sabdelfattah83   magma_int_t magma_weight(
181e0582403Sabdelfattah83     magma_int_t Q, magma_int_t dim,
182e0582403Sabdelfattah83     const CeedScalar *dqweight1d,
183e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
184*f71aa81bSnbeams     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
185e0582403Sabdelfattah83 
186e0582403Sabdelfattah83   void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem,
187868539c2SNatalie Beams                               magma_int_t Q,
18880a9ef05SNatalie Beams                               CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue);
189e0582403Sabdelfattah83 
1908dc8d968Sjeremylt   void magma_readDofsOffset(const magma_int_t NCOMP,
1918dc8d968Sjeremylt                             const magma_int_t compstride,
1928dc8d968Sjeremylt                             const magma_int_t esize, const magma_int_t nelem,
19380a9ef05SNatalie Beams                             magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv,
194e0582403Sabdelfattah83                             magma_queue_t queue);
195868539c2SNatalie Beams 
1968dc8d968Sjeremylt   void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
197266dd7abSnbeams                              const magma_int_t nelem, magma_int_t *strides,
19880a9ef05SNatalie Beams                              const CeedScalar *du, CeedScalar *dv,
199e0582403Sabdelfattah83                              magma_queue_t queue);
200266dd7abSnbeams 
2018dc8d968Sjeremylt   void magma_writeDofsOffset(const magma_int_t NCOMP,
2028dc8d968Sjeremylt                              const magma_int_t compstride,
2038dc8d968Sjeremylt                              const magma_int_t esize, const magma_int_t nelem,
20480a9ef05SNatalie Beams                              magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv,
205e0582403Sabdelfattah83                              magma_queue_t queue);
206868539c2SNatalie Beams 
2078dc8d968Sjeremylt   void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
208266dd7abSnbeams                               const magma_int_t nelem, magma_int_t *strides,
20980a9ef05SNatalie Beams                               const CeedScalar *du, CeedScalar *dv,
210e0582403Sabdelfattah83                               magma_queue_t queue);
211e0582403Sabdelfattah83 
212e0582403Sabdelfattah83   int magma_dgemm_nontensor(
213e0582403Sabdelfattah83     magma_trans_t transA, magma_trans_t transB,
214e0582403Sabdelfattah83     magma_int_t m, magma_int_t n, magma_int_t k,
215e0582403Sabdelfattah83     double alpha, const double *dA, magma_int_t ldda,
216e0582403Sabdelfattah83     const double *dB, magma_int_t lddb,
217e0582403Sabdelfattah83     double beta,  double *dC, magma_int_t lddc,
218e0582403Sabdelfattah83     magma_queue_t queue );
219e0582403Sabdelfattah83 
22080a9ef05SNatalie Beams   int magma_sgemm_nontensor(
22180a9ef05SNatalie Beams     magma_trans_t transA, magma_trans_t transB,
22280a9ef05SNatalie Beams     magma_int_t m, magma_int_t n, magma_int_t k,
22380a9ef05SNatalie Beams     float alpha, const float *dA, magma_int_t ldda,
22480a9ef05SNatalie Beams     const float *dB, magma_int_t lddb,
22580a9ef05SNatalie Beams     float beta,  float *dC, magma_int_t lddc,
22680a9ef05SNatalie Beams     magma_queue_t queue );
22780a9ef05SNatalie Beams 
2287f5b9731SStan Tomov   magma_int_t
2297f5b9731SStan Tomov   magma_isdevptr(const void *A);
2307f5b9731SStan Tomov 
231868539c2SNatalie Beams   int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
232868539c2SNatalie Beams                                     CeedInt Q1d,
233868539c2SNatalie Beams                                     const CeedScalar *interp1d,
234868539c2SNatalie Beams                                     const CeedScalar *grad1d,
235868539c2SNatalie Beams                                     const CeedScalar *qref1d,
236868539c2SNatalie Beams                                     const CeedScalar *qweight1d,
237868539c2SNatalie Beams                                     CeedBasis basis);
2387f5b9731SStan Tomov 
239868539c2SNatalie Beams   int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
240d4f68153Sjeremylt                               CeedInt ndof, CeedInt nqpts,
241d4f68153Sjeremylt                               const CeedScalar *interp,
242d4f68153Sjeremylt                               const CeedScalar *grad,
243d4f68153Sjeremylt                               const CeedScalar *qref,
244d4f68153Sjeremylt                               const CeedScalar *qweight,
245d4f68153Sjeremylt                               CeedBasis basis);
246868539c2SNatalie Beams 
247868539c2SNatalie Beams   int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
248868539c2SNatalie Beams                                       CeedCopyMode cmode,
249d655899aSNatalie Beams                                       const CeedInt *offsets,
250868539c2SNatalie Beams                                       CeedElemRestriction r);
251868539c2SNatalie Beams 
252868539c2SNatalie Beams   int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype,
253868539c2SNatalie Beams       const CeedCopyMode cmode,
254d655899aSNatalie Beams       const CeedInt *offsets,
255868539c2SNatalie Beams       const CeedElemRestriction res);
256a8c028e3SNatalie Beams 
257a8c028e3SNatalie Beams   int CeedOperatorCreate_Magma(CeedOperator op);
258a8c028e3SNatalie Beams 
2597f5b9731SStan Tomov   #ifdef __cplusplus
2607f5b9731SStan Tomov }
2617f5b9731SStan Tomov   #endif
2627f5b9731SStan Tomov 
2637f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function
2647f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr
2657f5b9731SStan Tomov 
266e0582403Sabdelfattah83 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
267e0582403Sabdelfattah83 // should do nothing
268e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...)
269e0582403Sabdelfattah83 
2707f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
2717f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE
2727f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000)
2737f5b9731SStan Tomov #endif
274e0582403Sabdelfattah83 
2753d576824SJeremy L Thompson #endif  // _ceed_magma_h
276