xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision c8b3a6279494317b68c92b36b4b02061d6425d46)
14444f328STzanio // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
24444f328STzanio // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
34444f328STzanio // reserved. See files LICENSE and NOTICE for details.
44444f328STzanio //
54444f328STzanio // This file is part of CEED, a collection of benchmarks, miniapps, software
64444f328STzanio // libraries and APIs for efficient high-order finite element and spectral
74444f328STzanio // element discretizations for exascale applications. For more information and
84444f328STzanio // source code availability see http://github.com/ceed.
94444f328STzanio //
104444f328STzanio // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
114444f328STzanio // a collaborative effort of two U.S. Department of Energy organizations (Office
124444f328STzanio // of Science and the National Nuclear Security Administration) responsible for
134444f328STzanio // the planning and preparation of a capable exascale ecosystem, including
144444f328STzanio // software, applications, hardware, advanced system engineering and early
154444f328STzanio // testbed platforms, in support of the nation's exascale computing imperative.
164444f328STzanio 
1790104f39SStan Tomov // magma functions specific to ceed
18972b3d9dSNatalie Beams #ifndef _ceed_magma_h
193d576824SJeremy L Thompson #define _ceed_magma_h
2090104f39SStan Tomov 
21ec3da8bcSJed Brown #include <ceed/ceed.h>
22ec3da8bcSJed Brown #include <ceed/backend.h>
23e0582403Sabdelfattah83 #include <magma_v2.h>
24e0582403Sabdelfattah83 
25e0582403Sabdelfattah83 typedef enum {
26e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_GENERIC=101,
27e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_SPECIFIC=102
28e0582403Sabdelfattah83 } magma_kernel_mode_t;
29e0582403Sabdelfattah83 
30e0582403Sabdelfattah83 typedef struct {
31e0582403Sabdelfattah83   magma_kernel_mode_t basis_kernel_mode;
32e0582403Sabdelfattah83   magma_int_t maxthreads[3];
33e0582403Sabdelfattah83   magma_device_t device;
34e0582403Sabdelfattah83   magma_queue_t queue;
35e0582403Sabdelfattah83 } Ceed_Magma;
365a9ca9adSVeselin Dobrev 
377f5b9731SStan Tomov typedef struct {
387f5b9731SStan Tomov   CeedScalar *dqref1d;
397f5b9731SStan Tomov   CeedScalar *dinterp1d;
407f5b9731SStan Tomov   CeedScalar *dgrad1d;
417f5b9731SStan Tomov   CeedScalar *dqweight1d;
427f5b9731SStan Tomov } CeedBasis_Magma;
437f5b9731SStan Tomov 
447f5b9731SStan Tomov typedef struct {
45868539c2SNatalie Beams   CeedScalar *dqref;
46868539c2SNatalie Beams   CeedScalar *dinterp;
47868539c2SNatalie Beams   CeedScalar *dgrad;
48868539c2SNatalie Beams   CeedScalar *dqweight;
49868539c2SNatalie Beams } CeedBasisNonTensor_Magma;
50868539c2SNatalie Beams 
51*c8b3a627SJed Brown typedef enum {
52*c8b3a627SJed Brown   OWNED_NONE = 0,
53*c8b3a627SJed Brown   OWNED_UNPINNED,
54*c8b3a627SJed Brown   OWNED_PINNED,
55*c8b3a627SJed Brown } OwnershipMode;
56*c8b3a627SJed Brown 
57868539c2SNatalie Beams typedef struct {
58d655899aSNatalie Beams   CeedInt *offsets;
59d655899aSNatalie Beams   CeedInt *doffsets;
60*c8b3a627SJed Brown   OwnershipMode own_;
61868539c2SNatalie Beams   int down_;            // cover a case where we own Device memory
62868539c2SNatalie Beams } CeedElemRestriction_Magma;
63868539c2SNatalie Beams 
64868539c2SNatalie Beams typedef struct {
657f5b9731SStan Tomov   const CeedScalar **inputs;
667f5b9731SStan Tomov   CeedScalar **outputs;
677f5b9731SStan Tomov   bool setupdone;
687f5b9731SStan Tomov } CeedQFunction_Magma;
697f5b9731SStan Tomov 
7090104f39SStan Tomov #define USE_MAGMA_BATCH
7197ee337cSStan Tomov #define USE_MAGMA_BATCH2
727f5b9731SStan Tomov #define USE_MAGMA_BATCH3
737f5b9731SStan Tomov #define USE_MAGMA_BATCH4
7490104f39SStan Tomov 
757f5b9731SStan Tomov #ifdef __cplusplus
767f5b9731SStan Tomov CEED_INTERN {
777f5b9731SStan Tomov #endif
78e0582403Sabdelfattah83 
79e0582403Sabdelfattah83   magma_int_t magma_interp_1d(
80e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
81e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
82e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
83e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
84e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
85e0582403Sabdelfattah83 
86e0582403Sabdelfattah83   magma_int_t magma_interp_2d(
87e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
88e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
89e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
90e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
91e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
92e0582403Sabdelfattah83 
93e0582403Sabdelfattah83   magma_int_t magma_interp_3d(
94e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
95e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
96e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
97e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
98e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
99e0582403Sabdelfattah83 
100e0582403Sabdelfattah83   magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q,
101868539c2SNatalie Beams                                    magma_int_t dim, magma_int_t ncomp,
10280a9ef05SNatalie Beams                                    const CeedScalar *dT, CeedTransposeMode tmode,
10380a9ef05SNatalie Beams                                    const CeedScalar *dU, magma_int_t u_elemstride,
104e0582403Sabdelfattah83                                    magma_int_t cstrdU,
10580a9ef05SNatalie Beams                                    CeedScalar *dV, magma_int_t v_elemstride,
106e0582403Sabdelfattah83                                    magma_int_t cstrdV,
107e0582403Sabdelfattah83                                    magma_int_t nelem, magma_queue_t queue);
1087f5b9731SStan Tomov 
109e0582403Sabdelfattah83   magma_int_t magma_interp(
110e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q,
111868539c2SNatalie Beams     magma_int_t dim, magma_int_t ncomp,
11280a9ef05SNatalie Beams     const CeedScalar *dT, CeedTransposeMode tmode,
11380a9ef05SNatalie Beams     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
11480a9ef05SNatalie Beams     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
115e0582403Sabdelfattah83     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
1167f5b9731SStan Tomov 
117e0582403Sabdelfattah83   magma_int_t magma_grad_1d(
118e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
119e0582403Sabdelfattah83     const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode,
120e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
121e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
122e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
123868539c2SNatalie Beams 
124e0582403Sabdelfattah83   magma_int_t magma_gradn_2d(
125e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
126e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
127e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
128e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
129e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
130e0582403Sabdelfattah83 
131e0582403Sabdelfattah83   magma_int_t magma_gradt_2d(
132e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
133e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
134e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
135e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
136e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
137e0582403Sabdelfattah83 
138e0582403Sabdelfattah83   magma_int_t magma_gradn_3d(
139e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
140e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
141e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
142e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
143e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
144e0582403Sabdelfattah83 
145e0582403Sabdelfattah83   magma_int_t magma_gradt_3d(
146e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
147e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
148e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
149e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
150e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
151e0582403Sabdelfattah83 
152e0582403Sabdelfattah83   magma_int_t magma_grad_generic(
153e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
154e0582403Sabdelfattah83     const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
155e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
156e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
157e0582403Sabdelfattah83     magma_int_t nelem, magma_queue_t queue);
158e0582403Sabdelfattah83 
159e0582403Sabdelfattah83   magma_int_t magma_grad(
160e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
161e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
162e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU,
163e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV,
164e0582403Sabdelfattah83     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
165e0582403Sabdelfattah83 
166e0582403Sabdelfattah83   magma_int_t magma_weight_1d(
167e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
168e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
169e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
170e0582403Sabdelfattah83 
171e0582403Sabdelfattah83   magma_int_t magma_weight_2d(
172e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
173e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
174e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
175e0582403Sabdelfattah83 
176e0582403Sabdelfattah83   magma_int_t magma_weight_3d(
177e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
178e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
179e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
180e0582403Sabdelfattah83 
181e0582403Sabdelfattah83   magma_int_t magma_weight_generic(
182e0582403Sabdelfattah83     magma_int_t Q, magma_int_t dim,
183e0582403Sabdelfattah83     const CeedScalar *dqweight1d,
184e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t vstride,
185e0582403Sabdelfattah83     magma_int_t nelem, magma_queue_t queue);
186e0582403Sabdelfattah83 
187e0582403Sabdelfattah83   magma_int_t magma_weight(
188e0582403Sabdelfattah83     magma_int_t Q, magma_int_t dim,
189e0582403Sabdelfattah83     const CeedScalar *dqweight1d,
190e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
191e0582403Sabdelfattah83     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
192e0582403Sabdelfattah83 
193e0582403Sabdelfattah83   void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem,
194868539c2SNatalie Beams                               magma_int_t Q,
19580a9ef05SNatalie Beams                               CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue);
196e0582403Sabdelfattah83 
1978dc8d968Sjeremylt   void magma_readDofsOffset(const magma_int_t NCOMP,
1988dc8d968Sjeremylt                             const magma_int_t compstride,
1998dc8d968Sjeremylt                             const magma_int_t esize, const magma_int_t nelem,
20080a9ef05SNatalie Beams                             magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv,
201e0582403Sabdelfattah83                             magma_queue_t queue);
202868539c2SNatalie Beams 
2038dc8d968Sjeremylt   void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
204266dd7abSnbeams                              const magma_int_t nelem, magma_int_t *strides,
20580a9ef05SNatalie Beams                              const CeedScalar *du, CeedScalar *dv,
206e0582403Sabdelfattah83                              magma_queue_t queue);
207266dd7abSnbeams 
2088dc8d968Sjeremylt   void magma_writeDofsOffset(const magma_int_t NCOMP,
2098dc8d968Sjeremylt                              const magma_int_t compstride,
2108dc8d968Sjeremylt                              const magma_int_t esize, const magma_int_t nelem,
21180a9ef05SNatalie Beams                              magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv,
212e0582403Sabdelfattah83                              magma_queue_t queue);
213868539c2SNatalie Beams 
2148dc8d968Sjeremylt   void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
215266dd7abSnbeams                               const magma_int_t nelem, magma_int_t *strides,
21680a9ef05SNatalie Beams                               const CeedScalar *du, CeedScalar *dv,
217e0582403Sabdelfattah83                               magma_queue_t queue);
218e0582403Sabdelfattah83 
219e0582403Sabdelfattah83   int magma_dgemm_nontensor(
220e0582403Sabdelfattah83     magma_trans_t transA, magma_trans_t transB,
221e0582403Sabdelfattah83     magma_int_t m, magma_int_t n, magma_int_t k,
222e0582403Sabdelfattah83     double alpha, const double *dA, magma_int_t ldda,
223e0582403Sabdelfattah83     const double *dB, magma_int_t lddb,
224e0582403Sabdelfattah83     double beta,  double *dC, magma_int_t lddc,
225e0582403Sabdelfattah83     magma_queue_t queue );
226e0582403Sabdelfattah83 
22780a9ef05SNatalie Beams   int magma_sgemm_nontensor(
22880a9ef05SNatalie Beams     magma_trans_t transA, magma_trans_t transB,
22980a9ef05SNatalie Beams     magma_int_t m, magma_int_t n, magma_int_t k,
23080a9ef05SNatalie Beams     float alpha, const float *dA, magma_int_t ldda,
23180a9ef05SNatalie Beams     const float *dB, magma_int_t lddb,
23280a9ef05SNatalie Beams     float beta,  float *dC, magma_int_t lddc,
23380a9ef05SNatalie Beams     magma_queue_t queue );
23480a9ef05SNatalie Beams 
2357f5b9731SStan Tomov   magma_int_t
2367f5b9731SStan Tomov   magma_isdevptr(const void *A);
2377f5b9731SStan Tomov 
238868539c2SNatalie Beams   int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
239868539c2SNatalie Beams                                     CeedInt Q1d,
240868539c2SNatalie Beams                                     const CeedScalar *interp1d,
241868539c2SNatalie Beams                                     const CeedScalar *grad1d,
242868539c2SNatalie Beams                                     const CeedScalar *qref1d,
243868539c2SNatalie Beams                                     const CeedScalar *qweight1d,
244868539c2SNatalie Beams                                     CeedBasis basis);
2457f5b9731SStan Tomov 
246868539c2SNatalie Beams   int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
247d4f68153Sjeremylt                               CeedInt ndof, CeedInt nqpts,
248d4f68153Sjeremylt                               const CeedScalar *interp,
249d4f68153Sjeremylt                               const CeedScalar *grad,
250d4f68153Sjeremylt                               const CeedScalar *qref,
251d4f68153Sjeremylt                               const CeedScalar *qweight,
252d4f68153Sjeremylt                               CeedBasis basis);
253868539c2SNatalie Beams 
254868539c2SNatalie Beams   int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
255868539c2SNatalie Beams                                       CeedCopyMode cmode,
256d655899aSNatalie Beams                                       const CeedInt *offsets,
257868539c2SNatalie Beams                                       CeedElemRestriction r);
258868539c2SNatalie Beams 
259868539c2SNatalie Beams   int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype,
260868539c2SNatalie Beams       const CeedCopyMode cmode,
261d655899aSNatalie Beams       const CeedInt *offsets,
262868539c2SNatalie Beams       const CeedElemRestriction res);
263a8c028e3SNatalie Beams 
264a8c028e3SNatalie Beams   int CeedOperatorCreate_Magma(CeedOperator op);
265a8c028e3SNatalie Beams 
2667f5b9731SStan Tomov   #ifdef __cplusplus
2677f5b9731SStan Tomov }
2687f5b9731SStan Tomov   #endif
2697f5b9731SStan Tomov 
2707f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function
2717f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr
2727f5b9731SStan Tomov 
273e0582403Sabdelfattah83 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
274e0582403Sabdelfattah83 // should do nothing
275e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...)
276e0582403Sabdelfattah83 
2777f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
2787f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE
2797f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000)
2807f5b9731SStan Tomov #endif
281e0582403Sabdelfattah83 
2823d576824SJeremy L Thompson #endif  // _ceed_magma_h
283