xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision f99981a39dfa8f2347c0af70b34eb4868a5e9d95)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 // magma functions specific to ceed
18 #ifndef _ceed_magma_h
19 #define _ceed_magma_h
20 
21 #include <ceed/ceed.h>
22 #include <ceed/backend.h>
23 #include <magma_v2.h>
24 
25 typedef enum {
26   MAGMA_KERNEL_DIM_GENERIC=101,
27   MAGMA_KERNEL_DIM_SPECIFIC=102
28 } magma_kernel_mode_t;
29 
30 typedef struct {
31   magma_kernel_mode_t basis_kernel_mode;
32   magma_device_t device;
33   magma_queue_t queue;
34 } Ceed_Magma;
35 
36 typedef struct {
37   CeedScalar *dqref1d;
38   CeedScalar *dinterp1d;
39   CeedScalar *dgrad1d;
40   CeedScalar *dqweight1d;
41 } CeedBasis_Magma;
42 
43 typedef struct {
44   CeedScalar *dqref;
45   CeedScalar *dinterp;
46   CeedScalar *dgrad;
47   CeedScalar *dqweight;
48 } CeedBasisNonTensor_Magma;
49 
50 typedef enum {
51   OWNED_NONE = 0,
52   OWNED_UNPINNED,
53   OWNED_PINNED,
54 } OwnershipMode;
55 
56 typedef struct {
57   CeedInt *offsets;
58   CeedInt *doffsets;
59   OwnershipMode own_;
60   int down_;            // cover a case where we own Device memory
61 } CeedElemRestriction_Magma;
62 
63 typedef struct {
64   const CeedScalar **inputs;
65   CeedScalar **outputs;
66   bool setupdone;
67 } CeedQFunction_Magma;
68 
69 #define USE_MAGMA_BATCH
70 #define USE_MAGMA_BATCH2
71 #define USE_MAGMA_BATCH3
72 #define USE_MAGMA_BATCH4
73 
74 #ifdef __cplusplus
75 CEED_INTERN {
76 #endif
77 
78   magma_int_t magma_interp_1d(
79     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
80     const CeedScalar *dT, CeedTransposeMode tmode,
81     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
82     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
83     magma_int_t nelem, magma_queue_t queue);
84 
85   magma_int_t magma_interp_2d(
86     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
87     const CeedScalar *dT, CeedTransposeMode tmode,
88     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
89     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
90     magma_int_t nelem, magma_queue_t queue);
91 
92   magma_int_t magma_interp_3d(
93     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
94     const CeedScalar *dT, CeedTransposeMode tmode,
95     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
96     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
97     magma_int_t nelem, magma_queue_t queue);
98 
99   magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q,
100                                    magma_int_t dim, magma_int_t ncomp,
101                                    const CeedScalar *dT, CeedTransposeMode tmode,
102                                    const CeedScalar *dU, magma_int_t u_elemstride,
103                                    magma_int_t cstrdU,
104                                    CeedScalar *dV, magma_int_t v_elemstride,
105                                    magma_int_t cstrdV,
106                                    magma_int_t nelem, magma_queue_t queue);
107 
108   magma_int_t magma_interp(
109     magma_int_t P, magma_int_t Q,
110     magma_int_t dim, magma_int_t ncomp,
111     const CeedScalar *dT, CeedTransposeMode tmode,
112     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
113     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
114     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
115 
116   magma_int_t magma_grad_1d(
117     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
118     const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode,
119     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
120     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
121     magma_int_t nelem, magma_queue_t queue);
122 
123   magma_int_t magma_gradn_2d(
124     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
125     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
126     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
127     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
128     magma_int_t nelem, magma_queue_t queue);
129 
130   magma_int_t magma_gradt_2d(
131     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
132     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
133     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
134     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
135     magma_int_t nelem, magma_queue_t queue);
136 
137   magma_int_t magma_gradn_3d(
138     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
139     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
140     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
141     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
142     magma_int_t nelem, magma_queue_t queue);
143 
144   magma_int_t magma_gradt_3d(
145     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
146     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
147     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
148     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
149     magma_int_t nelem, magma_queue_t queue);
150 
151   magma_int_t magma_grad_generic(
152     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
153     const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
154     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
155     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
156     magma_int_t nelem, magma_queue_t queue);
157 
158   magma_int_t magma_grad(
159     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
160     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
161     const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU,
162     CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV,
163     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
164 
165   magma_int_t magma_weight_1d(
166     magma_int_t Q, const CeedScalar *dqweight1d,
167     CeedScalar *dV, magma_int_t v_stride,
168     magma_int_t nelem, magma_queue_t queue);
169 
170   magma_int_t magma_weight_2d(
171     magma_int_t Q, const CeedScalar *dqweight1d,
172     CeedScalar *dV, magma_int_t v_stride,
173     magma_int_t nelem, magma_queue_t queue);
174 
175   magma_int_t magma_weight_3d(
176     magma_int_t Q, const CeedScalar *dqweight1d,
177     CeedScalar *dV, magma_int_t v_stride,
178     magma_int_t nelem, magma_queue_t queue);
179 
180   magma_int_t magma_weight_generic(
181     magma_int_t Q, magma_int_t dim,
182     const CeedScalar *dqweight1d,
183     CeedScalar *dV, magma_int_t vstride,
184     magma_int_t nelem, magma_queue_t queue);
185 
186   magma_int_t magma_weight(
187     magma_int_t Q, magma_int_t dim,
188     const CeedScalar *dqweight1d,
189     CeedScalar *dV, magma_int_t v_stride,
190     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
191 
192   void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem,
193                               magma_int_t Q,
194                               CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue);
195 
196   void magma_readDofsOffset(const magma_int_t NCOMP,
197                             const magma_int_t compstride,
198                             const magma_int_t esize, const magma_int_t nelem,
199                             magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv,
200                             magma_queue_t queue);
201 
202   void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
203                              const magma_int_t nelem, magma_int_t *strides,
204                              const CeedScalar *du, CeedScalar *dv,
205                              magma_queue_t queue);
206 
207   void magma_writeDofsOffset(const magma_int_t NCOMP,
208                              const magma_int_t compstride,
209                              const magma_int_t esize, const magma_int_t nelem,
210                              magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv,
211                              magma_queue_t queue);
212 
213   void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
214                               const magma_int_t nelem, magma_int_t *strides,
215                               const CeedScalar *du, CeedScalar *dv,
216                               magma_queue_t queue);
217 
218   int magma_dgemm_nontensor(
219     magma_trans_t transA, magma_trans_t transB,
220     magma_int_t m, magma_int_t n, magma_int_t k,
221     double alpha, const double *dA, magma_int_t ldda,
222     const double *dB, magma_int_t lddb,
223     double beta,  double *dC, magma_int_t lddc,
224     magma_queue_t queue );
225 
226   int magma_sgemm_nontensor(
227     magma_trans_t transA, magma_trans_t transB,
228     magma_int_t m, magma_int_t n, magma_int_t k,
229     float alpha, const float *dA, magma_int_t ldda,
230     const float *dB, magma_int_t lddb,
231     float beta,  float *dC, magma_int_t lddc,
232     magma_queue_t queue );
233 
234   magma_int_t
235   magma_isdevptr(const void *A);
236 
237   int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
238                                     CeedInt Q1d,
239                                     const CeedScalar *interp1d,
240                                     const CeedScalar *grad1d,
241                                     const CeedScalar *qref1d,
242                                     const CeedScalar *qweight1d,
243                                     CeedBasis basis);
244 
245   int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
246                               CeedInt ndof, CeedInt nqpts,
247                               const CeedScalar *interp,
248                               const CeedScalar *grad,
249                               const CeedScalar *qref,
250                               const CeedScalar *qweight,
251                               CeedBasis basis);
252 
253   int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
254                                       CeedCopyMode cmode,
255                                       const CeedInt *offsets,
256                                       CeedElemRestriction r);
257 
258   int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype,
259       const CeedCopyMode cmode,
260       const CeedInt *offsets,
261       const CeedElemRestriction res);
262 
263   int CeedOperatorCreate_Magma(CeedOperator op);
264 
265   #ifdef __cplusplus
266 }
267   #endif
268 
269 // comment the line below to use the default magma_is_devptr function
270 #define magma_is_devptr magma_isdevptr
271 
272 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
273 // should do nothing
274 #define ceed_magma_queue_sync(...)
275 
276 // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
277 #ifndef MAGMA_BATCH_STRIDE
278 #define MAGMA_BATCH_STRIDE (1000)
279 #endif
280 
281 #endif  // _ceed_magma_h
282