xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision 80a9ef0545a39c00cdcaab1ca26f8053604f3120)
14444f328STzanio // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
24444f328STzanio // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
34444f328STzanio // reserved. See files LICENSE and NOTICE for details.
44444f328STzanio //
54444f328STzanio // This file is part of CEED, a collection of benchmarks, miniapps, software
64444f328STzanio // libraries and APIs for efficient high-order finite element and spectral
74444f328STzanio // element discretizations for exascale applications. For more information and
84444f328STzanio // source code availability see http://github.com/ceed.
94444f328STzanio //
104444f328STzanio // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
114444f328STzanio // a collaborative effort of two U.S. Department of Energy organizations (Office
124444f328STzanio // of Science and the National Nuclear Security Administration) responsible for
134444f328STzanio // the planning and preparation of a capable exascale ecosystem, including
144444f328STzanio // software, applications, hardware, advanced system engineering and early
154444f328STzanio // testbed platforms, in support of the nation's exascale computing imperative.
164444f328STzanio 
1790104f39SStan Tomov // magma functions specific to ceed
18972b3d9dSNatalie Beams #ifndef _ceed_magma_h
193d576824SJeremy L Thompson #define _ceed_magma_h
2090104f39SStan Tomov 
21ec3da8bcSJed Brown #include <ceed/ceed.h>
22ec3da8bcSJed Brown #include <ceed/backend.h>
23e0582403Sabdelfattah83 #include <magma_v2.h>
24e0582403Sabdelfattah83 
25e0582403Sabdelfattah83 typedef enum {
26e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_GENERIC=101,
27e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_SPECIFIC=102
28e0582403Sabdelfattah83 } magma_kernel_mode_t;
29e0582403Sabdelfattah83 
30e0582403Sabdelfattah83 typedef struct {
31e0582403Sabdelfattah83   magma_kernel_mode_t basis_kernel_mode;
32e0582403Sabdelfattah83   magma_int_t maxthreads[3];
33e0582403Sabdelfattah83   magma_device_t device;
34e0582403Sabdelfattah83   magma_queue_t queue;
35e0582403Sabdelfattah83 } Ceed_Magma;
365a9ca9adSVeselin Dobrev 
377f5b9731SStan Tomov typedef struct {
387f5b9731SStan Tomov   CeedScalar *dqref1d;
397f5b9731SStan Tomov   CeedScalar *dinterp1d;
407f5b9731SStan Tomov   CeedScalar *dgrad1d;
417f5b9731SStan Tomov   CeedScalar *dqweight1d;
427f5b9731SStan Tomov } CeedBasis_Magma;
437f5b9731SStan Tomov 
447f5b9731SStan Tomov typedef struct {
45868539c2SNatalie Beams   CeedScalar *dqref;
46868539c2SNatalie Beams   CeedScalar *dinterp;
47868539c2SNatalie Beams   CeedScalar *dgrad;
48868539c2SNatalie Beams   CeedScalar *dqweight;
49868539c2SNatalie Beams } CeedBasisNonTensor_Magma;
50868539c2SNatalie Beams 
51868539c2SNatalie Beams typedef struct {
52d655899aSNatalie Beams   CeedInt *offsets;
53d655899aSNatalie Beams   CeedInt *doffsets;
54868539c2SNatalie Beams   int  own_;
55868539c2SNatalie Beams   int down_;            // cover a case where we own Device memory
56868539c2SNatalie Beams } CeedElemRestriction_Magma;
57868539c2SNatalie Beams 
58868539c2SNatalie Beams typedef struct {
597f5b9731SStan Tomov   const CeedScalar **inputs;
607f5b9731SStan Tomov   CeedScalar **outputs;
617f5b9731SStan Tomov   bool setupdone;
627f5b9731SStan Tomov } CeedQFunction_Magma;
637f5b9731SStan Tomov 
6490104f39SStan Tomov #define USE_MAGMA_BATCH
6597ee337cSStan Tomov #define USE_MAGMA_BATCH2
667f5b9731SStan Tomov #define USE_MAGMA_BATCH3
677f5b9731SStan Tomov #define USE_MAGMA_BATCH4
6890104f39SStan Tomov 
697f5b9731SStan Tomov #ifdef __cplusplus
707f5b9731SStan Tomov CEED_INTERN {
717f5b9731SStan Tomov #endif
72e0582403Sabdelfattah83 
73e0582403Sabdelfattah83   magma_int_t magma_interp_1d(
74e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
75e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
76e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
77e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
78e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
79e0582403Sabdelfattah83 
80e0582403Sabdelfattah83   magma_int_t magma_interp_2d(
81e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
82e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
83e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
84e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
85e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
86e0582403Sabdelfattah83 
87e0582403Sabdelfattah83   magma_int_t magma_interp_3d(
88e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
89e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
90e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
91e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
92e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
93e0582403Sabdelfattah83 
94e0582403Sabdelfattah83   magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q,
95868539c2SNatalie Beams                                    magma_int_t dim, magma_int_t ncomp,
96*80a9ef05SNatalie Beams                                    const CeedScalar *dT, CeedTransposeMode tmode,
97*80a9ef05SNatalie Beams                                    const CeedScalar *dU, magma_int_t u_elemstride,
98e0582403Sabdelfattah83                                    magma_int_t cstrdU,
99*80a9ef05SNatalie Beams                                    CeedScalar *dV, magma_int_t v_elemstride,
100e0582403Sabdelfattah83                                    magma_int_t cstrdV,
101e0582403Sabdelfattah83                                    magma_int_t nelem, magma_queue_t queue);
1027f5b9731SStan Tomov 
103e0582403Sabdelfattah83   magma_int_t magma_interp(
104e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q,
105868539c2SNatalie Beams     magma_int_t dim, magma_int_t ncomp,
106*80a9ef05SNatalie Beams     const CeedScalar *dT, CeedTransposeMode tmode,
107*80a9ef05SNatalie Beams     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
108*80a9ef05SNatalie Beams     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
109e0582403Sabdelfattah83     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
1107f5b9731SStan Tomov 
111e0582403Sabdelfattah83   magma_int_t magma_grad_1d(
112e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
113e0582403Sabdelfattah83     const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode,
114e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
115e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
116e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
117868539c2SNatalie Beams 
118e0582403Sabdelfattah83   magma_int_t magma_gradn_2d(
119e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
120e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
121e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
122e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
123e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
124e0582403Sabdelfattah83 
125e0582403Sabdelfattah83   magma_int_t magma_gradt_2d(
126e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
127e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
128e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
129e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
130e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
131e0582403Sabdelfattah83 
132e0582403Sabdelfattah83   magma_int_t magma_gradn_3d(
133e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
134e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
135e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
136e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
137e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
138e0582403Sabdelfattah83 
139e0582403Sabdelfattah83   magma_int_t magma_gradt_3d(
140e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
141e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
142e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
143e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
144e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
145e0582403Sabdelfattah83 
146e0582403Sabdelfattah83   magma_int_t magma_grad_generic(
147e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
148e0582403Sabdelfattah83     const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
149e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
150e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
151e0582403Sabdelfattah83     magma_int_t nelem, magma_queue_t queue);
152e0582403Sabdelfattah83 
153e0582403Sabdelfattah83   magma_int_t magma_grad(
154e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
155e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
156e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU,
157e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV,
158e0582403Sabdelfattah83     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
159e0582403Sabdelfattah83 
160e0582403Sabdelfattah83   magma_int_t magma_weight_1d(
161e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
162e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
163e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
164e0582403Sabdelfattah83 
165e0582403Sabdelfattah83   magma_int_t magma_weight_2d(
166e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
167e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
168e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
169e0582403Sabdelfattah83 
170e0582403Sabdelfattah83   magma_int_t magma_weight_3d(
171e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
172e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
173e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
174e0582403Sabdelfattah83 
175e0582403Sabdelfattah83   magma_int_t magma_weight_generic(
176e0582403Sabdelfattah83     magma_int_t Q, magma_int_t dim,
177e0582403Sabdelfattah83     const CeedScalar *dqweight1d,
178e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t vstride,
179e0582403Sabdelfattah83     magma_int_t nelem, magma_queue_t queue);
180e0582403Sabdelfattah83 
181e0582403Sabdelfattah83   magma_int_t magma_weight(
182e0582403Sabdelfattah83     magma_int_t Q, magma_int_t dim,
183e0582403Sabdelfattah83     const CeedScalar *dqweight1d,
184e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
185e0582403Sabdelfattah83     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
186e0582403Sabdelfattah83 
187e0582403Sabdelfattah83   void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem,
188868539c2SNatalie Beams                               magma_int_t Q,
189*80a9ef05SNatalie Beams                               CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue);
190e0582403Sabdelfattah83 
1918dc8d968Sjeremylt   void magma_readDofsOffset(const magma_int_t NCOMP,
1928dc8d968Sjeremylt                             const magma_int_t compstride,
1938dc8d968Sjeremylt                             const magma_int_t esize, const magma_int_t nelem,
194*80a9ef05SNatalie Beams                             magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv,
195e0582403Sabdelfattah83                             magma_queue_t queue);
196868539c2SNatalie Beams 
1978dc8d968Sjeremylt   void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
198266dd7abSnbeams                              const magma_int_t nelem, magma_int_t *strides,
199*80a9ef05SNatalie Beams                              const CeedScalar *du, CeedScalar *dv,
200e0582403Sabdelfattah83                              magma_queue_t queue);
201266dd7abSnbeams 
2028dc8d968Sjeremylt   void magma_writeDofsOffset(const magma_int_t NCOMP,
2038dc8d968Sjeremylt                              const magma_int_t compstride,
2048dc8d968Sjeremylt                              const magma_int_t esize, const magma_int_t nelem,
205*80a9ef05SNatalie Beams                              magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv,
206e0582403Sabdelfattah83                              magma_queue_t queue);
207868539c2SNatalie Beams 
2088dc8d968Sjeremylt   void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
209266dd7abSnbeams                               const magma_int_t nelem, magma_int_t *strides,
210*80a9ef05SNatalie Beams                               const CeedScalar *du, CeedScalar *dv,
211e0582403Sabdelfattah83                               magma_queue_t queue);
212e0582403Sabdelfattah83 
213e0582403Sabdelfattah83   int magma_dgemm_nontensor(
214e0582403Sabdelfattah83     magma_trans_t transA, magma_trans_t transB,
215e0582403Sabdelfattah83     magma_int_t m, magma_int_t n, magma_int_t k,
216e0582403Sabdelfattah83     double alpha, const double *dA, magma_int_t ldda,
217e0582403Sabdelfattah83     const double *dB, magma_int_t lddb,
218e0582403Sabdelfattah83     double beta,  double *dC, magma_int_t lddc,
219e0582403Sabdelfattah83     magma_queue_t queue );
220e0582403Sabdelfattah83 
221*80a9ef05SNatalie Beams   int magma_sgemm_nontensor(
222*80a9ef05SNatalie Beams     magma_trans_t transA, magma_trans_t transB,
223*80a9ef05SNatalie Beams     magma_int_t m, magma_int_t n, magma_int_t k,
224*80a9ef05SNatalie Beams     float alpha, const float *dA, magma_int_t ldda,
225*80a9ef05SNatalie Beams     const float *dB, magma_int_t lddb,
226*80a9ef05SNatalie Beams     float beta,  float *dC, magma_int_t lddc,
227*80a9ef05SNatalie Beams     magma_queue_t queue );
228*80a9ef05SNatalie Beams 
2297f5b9731SStan Tomov   magma_int_t
2307f5b9731SStan Tomov   magma_isdevptr(const void *A);
2317f5b9731SStan Tomov 
232868539c2SNatalie Beams   int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
233868539c2SNatalie Beams                                     CeedInt Q1d,
234868539c2SNatalie Beams                                     const CeedScalar *interp1d,
235868539c2SNatalie Beams                                     const CeedScalar *grad1d,
236868539c2SNatalie Beams                                     const CeedScalar *qref1d,
237868539c2SNatalie Beams                                     const CeedScalar *qweight1d,
238868539c2SNatalie Beams                                     CeedBasis basis);
2397f5b9731SStan Tomov 
240868539c2SNatalie Beams   int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
241d4f68153Sjeremylt                               CeedInt ndof, CeedInt nqpts,
242d4f68153Sjeremylt                               const CeedScalar *interp,
243d4f68153Sjeremylt                               const CeedScalar *grad,
244d4f68153Sjeremylt                               const CeedScalar *qref,
245d4f68153Sjeremylt                               const CeedScalar *qweight,
246d4f68153Sjeremylt                               CeedBasis basis);
247868539c2SNatalie Beams 
248868539c2SNatalie Beams   int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
249868539c2SNatalie Beams                                       CeedCopyMode cmode,
250d655899aSNatalie Beams                                       const CeedInt *offsets,
251868539c2SNatalie Beams                                       CeedElemRestriction r);
252868539c2SNatalie Beams 
253868539c2SNatalie Beams   int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype,
254868539c2SNatalie Beams       const CeedCopyMode cmode,
255d655899aSNatalie Beams       const CeedInt *offsets,
256868539c2SNatalie Beams       const CeedElemRestriction res);
257a8c028e3SNatalie Beams 
258a8c028e3SNatalie Beams   int CeedOperatorCreate_Magma(CeedOperator op);
259a8c028e3SNatalie Beams 
2607f5b9731SStan Tomov   #ifdef __cplusplus
2617f5b9731SStan Tomov }
2627f5b9731SStan Tomov   #endif
2637f5b9731SStan Tomov 
2647f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function
2657f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr
2667f5b9731SStan Tomov 
267e0582403Sabdelfattah83 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
268e0582403Sabdelfattah83 // should do nothing
269e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...)
270e0582403Sabdelfattah83 
2717f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
2727f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE
2737f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000)
2747f5b9731SStan Tomov #endif
275e0582403Sabdelfattah83 
2763d576824SJeremy L Thompson #endif  // _ceed_magma_h
277