xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision e0582403bfe43d7f8c95d9630d18b40290e7945c)
14444f328STzanio // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
24444f328STzanio // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
34444f328STzanio // reserved. See files LICENSE and NOTICE for details.
44444f328STzanio //
54444f328STzanio // This file is part of CEED, a collection of benchmarks, miniapps, software
64444f328STzanio // libraries and APIs for efficient high-order finite element and spectral
74444f328STzanio // element discretizations for exascale applications. For more information and
84444f328STzanio // source code availability see http://github.com/ceed.
94444f328STzanio //
104444f328STzanio // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
114444f328STzanio // a collaborative effort of two U.S. Department of Energy organizations (Office
124444f328STzanio // of Science and the National Nuclear Security Administration) responsible for
134444f328STzanio // the planning and preparation of a capable exascale ecosystem, including
144444f328STzanio // software, applications, hardware, advanced system engineering and early
154444f328STzanio // testbed platforms, in support of the nation's exascale computing imperative.
164444f328STzanio 
1790104f39SStan Tomov // magma functions specific to ceed
18*e0582403Sabdelfattah83 #ifndef CEED_MAGMA_H
19*e0582403Sabdelfattah83 #define CEED_MAGMA_H
2090104f39SStan Tomov 
217f5b9731SStan Tomov #include <string.h>
22d863ab9bSjeremylt #include <ceed-backend.h>
23*e0582403Sabdelfattah83 #include <magma_v2.h>
24*e0582403Sabdelfattah83 
25*e0582403Sabdelfattah83 typedef enum {
26*e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_GENERIC=101,
27*e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_SPECIFIC=102
28*e0582403Sabdelfattah83 } magma_kernel_mode_t;
29*e0582403Sabdelfattah83 
30*e0582403Sabdelfattah83 typedef struct {
31*e0582403Sabdelfattah83   magma_kernel_mode_t basis_kernel_mode;
32*e0582403Sabdelfattah83   magma_int_t maxthreads[3];
33*e0582403Sabdelfattah83   magma_device_t device;
34*e0582403Sabdelfattah83   magma_queue_t queue;
35*e0582403Sabdelfattah83 } Ceed_Magma;
365a9ca9adSVeselin Dobrev 
377f5b9731SStan Tomov typedef struct {
387f5b9731SStan Tomov   CeedScalar *dqref1d;
397f5b9731SStan Tomov   CeedScalar *dinterp1d;
407f5b9731SStan Tomov   CeedScalar *dgrad1d;
417f5b9731SStan Tomov   CeedScalar *dqweight1d;
427f5b9731SStan Tomov } CeedBasis_Magma;
437f5b9731SStan Tomov 
447f5b9731SStan Tomov typedef struct {
45868539c2SNatalie Beams   CeedScalar *dqref;
46868539c2SNatalie Beams   CeedScalar *dinterp;
47868539c2SNatalie Beams   CeedScalar *dgrad;
48868539c2SNatalie Beams   CeedScalar *dqweight;
49868539c2SNatalie Beams } CeedBasisNonTensor_Magma;
50868539c2SNatalie Beams 
51868539c2SNatalie Beams typedef struct {
52d655899aSNatalie Beams   CeedInt *offsets;
53d655899aSNatalie Beams   CeedInt *doffsets;
54868539c2SNatalie Beams   int  own_;
55868539c2SNatalie Beams   int down_;            // cover a case where we own Device memory
56868539c2SNatalie Beams } CeedElemRestriction_Magma;
57868539c2SNatalie Beams 
58868539c2SNatalie Beams typedef struct {
597f5b9731SStan Tomov   const CeedScalar **inputs;
607f5b9731SStan Tomov   CeedScalar **outputs;
617f5b9731SStan Tomov   bool setupdone;
627f5b9731SStan Tomov } CeedQFunction_Magma;
637f5b9731SStan Tomov 
6490104f39SStan Tomov #define USE_MAGMA_BATCH
6597ee337cSStan Tomov #define USE_MAGMA_BATCH2
667f5b9731SStan Tomov #define USE_MAGMA_BATCH3
677f5b9731SStan Tomov #define USE_MAGMA_BATCH4
6890104f39SStan Tomov 
697f5b9731SStan Tomov #ifdef __cplusplus
707f5b9731SStan Tomov CEED_INTERN {
717f5b9731SStan Tomov #endif
72*e0582403Sabdelfattah83 
73*e0582403Sabdelfattah83   magma_int_t magma_interp_1d(
74*e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
75*e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
76*e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
77*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
78*e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
79*e0582403Sabdelfattah83 
80*e0582403Sabdelfattah83   magma_int_t magma_interp_2d(
81*e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
82*e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
83*e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
84*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
85*e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
86*e0582403Sabdelfattah83 
87*e0582403Sabdelfattah83   magma_int_t magma_interp_3d(
88*e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
89*e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
90*e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
91*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
92*e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
93*e0582403Sabdelfattah83 
94*e0582403Sabdelfattah83   magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q,
95868539c2SNatalie Beams                                    magma_int_t dim, magma_int_t ncomp,
96868539c2SNatalie Beams                                    const double *dT, CeedTransposeMode tmode,
97868539c2SNatalie Beams                                    const double *dU, magma_int_t u_elemstride,
98*e0582403Sabdelfattah83                                    magma_int_t cstrdU,
99868539c2SNatalie Beams                                    double *dV, magma_int_t v_elemstride,
100*e0582403Sabdelfattah83                                    magma_int_t cstrdV,
101*e0582403Sabdelfattah83                                    magma_int_t nelem, magma_queue_t queue);
1027f5b9731SStan Tomov 
103*e0582403Sabdelfattah83   magma_int_t magma_interp(
104*e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q,
105868539c2SNatalie Beams     magma_int_t dim, magma_int_t ncomp,
106*e0582403Sabdelfattah83     const double *dT, CeedTransposeMode tmode,
107*e0582403Sabdelfattah83     const double *dU, magma_int_t estrdU, magma_int_t cstrdU,
108*e0582403Sabdelfattah83     double *dV, magma_int_t estrdV, magma_int_t cstrdV,
109*e0582403Sabdelfattah83     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
1107f5b9731SStan Tomov 
111*e0582403Sabdelfattah83   magma_int_t magma_grad_1d(
112*e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
113*e0582403Sabdelfattah83     const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode,
114*e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
115*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
116*e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
117868539c2SNatalie Beams 
118*e0582403Sabdelfattah83   magma_int_t magma_gradn_2d(
119*e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
120*e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
121*e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
122*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
123*e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
124*e0582403Sabdelfattah83 
125*e0582403Sabdelfattah83   magma_int_t magma_gradt_2d(
126*e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
127*e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
128*e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
129*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
130*e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
131*e0582403Sabdelfattah83 
132*e0582403Sabdelfattah83   magma_int_t magma_gradn_3d(
133*e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
134*e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
135*e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
136*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
137*e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
138*e0582403Sabdelfattah83 
139*e0582403Sabdelfattah83   magma_int_t magma_gradt_3d(
140*e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
141*e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
142*e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
143*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
144*e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
145*e0582403Sabdelfattah83 
146*e0582403Sabdelfattah83   magma_int_t magma_grad_generic(
147*e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
148*e0582403Sabdelfattah83     const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
149*e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
150*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
151*e0582403Sabdelfattah83     magma_int_t nelem, magma_queue_t queue);
152*e0582403Sabdelfattah83 
153*e0582403Sabdelfattah83   magma_int_t magma_grad(
154*e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
155*e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
156*e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU,
157*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV,
158*e0582403Sabdelfattah83     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
159*e0582403Sabdelfattah83 
160*e0582403Sabdelfattah83   magma_int_t magma_weight_1d(
161*e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
162*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
163*e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
164*e0582403Sabdelfattah83 
165*e0582403Sabdelfattah83   magma_int_t magma_weight_2d(
166*e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
167*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
168*e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
169*e0582403Sabdelfattah83 
170*e0582403Sabdelfattah83   magma_int_t magma_weight_3d(
171*e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
172*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
173*e0582403Sabdelfattah83     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
174*e0582403Sabdelfattah83 
175*e0582403Sabdelfattah83   magma_int_t magma_weight_generic(
176*e0582403Sabdelfattah83     magma_int_t Q, magma_int_t dim,
177*e0582403Sabdelfattah83     const CeedScalar *dqweight1d,
178*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t vstride,
179*e0582403Sabdelfattah83     magma_int_t nelem, magma_queue_t queue);
180*e0582403Sabdelfattah83 
181*e0582403Sabdelfattah83   magma_int_t magma_weight(
182*e0582403Sabdelfattah83     magma_int_t Q, magma_int_t dim,
183*e0582403Sabdelfattah83     const CeedScalar *dqweight1d,
184*e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
185*e0582403Sabdelfattah83     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
186*e0582403Sabdelfattah83 
187*e0582403Sabdelfattah83   void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem,
188868539c2SNatalie Beams                               magma_int_t Q,
189*e0582403Sabdelfattah83                               double *dqweight, double *dv, magma_queue_t queue);
190*e0582403Sabdelfattah83 
191868539c2SNatalie Beams 
1928dc8d968Sjeremylt   void magma_readDofsOffset(const magma_int_t NCOMP,
1938dc8d968Sjeremylt                             const magma_int_t compstride,
1948dc8d968Sjeremylt                             const magma_int_t esize, const magma_int_t nelem,
195*e0582403Sabdelfattah83                             magma_int_t *offsets, const double *du, double *dv,
196*e0582403Sabdelfattah83                             magma_queue_t queue);
197868539c2SNatalie Beams 
1988dc8d968Sjeremylt   void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
199266dd7abSnbeams                              const magma_int_t nelem, magma_int_t *strides,
200*e0582403Sabdelfattah83                              const double *du, double *dv,
201*e0582403Sabdelfattah83                              magma_queue_t queue);
202266dd7abSnbeams 
2038dc8d968Sjeremylt   void magma_writeDofsOffset(const magma_int_t NCOMP,
2048dc8d968Sjeremylt                              const magma_int_t compstride,
2058dc8d968Sjeremylt                              const magma_int_t esize, const magma_int_t nelem,
206*e0582403Sabdelfattah83                              magma_int_t *offsets,const double *du, double *dv,
207*e0582403Sabdelfattah83                              magma_queue_t queue);
208868539c2SNatalie Beams 
2098dc8d968Sjeremylt   void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
210266dd7abSnbeams                               const magma_int_t nelem, magma_int_t *strides,
211*e0582403Sabdelfattah83                               const double *du, double *dv,
212*e0582403Sabdelfattah83                               magma_queue_t queue);
213*e0582403Sabdelfattah83 
214*e0582403Sabdelfattah83   int magma_dgemm_nontensor(
215*e0582403Sabdelfattah83     magma_trans_t transA, magma_trans_t transB,
216*e0582403Sabdelfattah83     magma_int_t m, magma_int_t n, magma_int_t k,
217*e0582403Sabdelfattah83     double alpha, const double *dA, magma_int_t ldda,
218*e0582403Sabdelfattah83     const double *dB, magma_int_t lddb,
219*e0582403Sabdelfattah83     double beta,  double *dC, magma_int_t lddc,
220*e0582403Sabdelfattah83     magma_queue_t queue );
221*e0582403Sabdelfattah83 
222266dd7abSnbeams 
2237f5b9731SStan Tomov   magma_int_t
2247f5b9731SStan Tomov   magma_isdevptr(const void *A);
2257f5b9731SStan Tomov 
226868539c2SNatalie Beams   int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
227868539c2SNatalie Beams                                     CeedInt Q1d,
228868539c2SNatalie Beams                                     const CeedScalar *interp1d,
229868539c2SNatalie Beams                                     const CeedScalar *grad1d,
230868539c2SNatalie Beams                                     const CeedScalar *qref1d,
231868539c2SNatalie Beams                                     const CeedScalar *qweight1d,
232868539c2SNatalie Beams                                     CeedBasis basis);
2337f5b9731SStan Tomov 
234868539c2SNatalie Beams   int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
235d4f68153Sjeremylt                               CeedInt ndof, CeedInt nqpts,
236d4f68153Sjeremylt                               const CeedScalar *interp,
237d4f68153Sjeremylt                               const CeedScalar *grad,
238d4f68153Sjeremylt                               const CeedScalar *qref,
239d4f68153Sjeremylt                               const CeedScalar *qweight,
240d4f68153Sjeremylt                               CeedBasis basis);
241868539c2SNatalie Beams 
242868539c2SNatalie Beams   int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
243868539c2SNatalie Beams                                       CeedCopyMode cmode,
244d655899aSNatalie Beams                                       const CeedInt *offsets,
245868539c2SNatalie Beams                                       CeedElemRestriction r);
246868539c2SNatalie Beams 
247868539c2SNatalie Beams   int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype,
248868539c2SNatalie Beams       const CeedCopyMode cmode,
249d655899aSNatalie Beams       const CeedInt *offsets,
250868539c2SNatalie Beams       const CeedElemRestriction res);
251a8c028e3SNatalie Beams 
252a8c028e3SNatalie Beams   int CeedOperatorCreate_Magma(CeedOperator op);
253a8c028e3SNatalie Beams 
2547f5b9731SStan Tomov   #ifdef __cplusplus
2557f5b9731SStan Tomov }
2567f5b9731SStan Tomov   #endif
2577f5b9731SStan Tomov 
258f68f2f01STzanio #define CeedDebug(...)
2597f5b9731SStan Tomov //#define CeedDebug(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
2607f5b9731SStan Tomov 
2617f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function
2627f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr
2637f5b9731SStan Tomov 
264*e0582403Sabdelfattah83 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
265*e0582403Sabdelfattah83 // should do nothing
266*e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...)
267*e0582403Sabdelfattah83 
2687f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
2697f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE
2707f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000)
2717f5b9731SStan Tomov #endif
272*e0582403Sabdelfattah83 
273*e0582403Sabdelfattah83 #endif  // CEED_MAGMA_H
274