xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision 2dc3fb5f4d99263629ede9783b5752ff8ee2177f)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
34444f328STzanio //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
54444f328STzanio //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
74444f328STzanio 
890104f39SStan Tomov // magma functions specific to ceed
9972b3d9dSNatalie Beams #ifndef _ceed_magma_h
103d576824SJeremy L Thompson #define _ceed_magma_h
1190104f39SStan Tomov 
12ec3da8bcSJed Brown #include <ceed/ceed.h>
13ec3da8bcSJed Brown #include <ceed/backend.h>
14e0582403Sabdelfattah83 #include <magma_v2.h>
15e0582403Sabdelfattah83 
16f6af633fSnbeams #define MAGMA_MAXTHREADS_1D 128
17f6af633fSnbeams #define MAGMA_MAXTHREADS_2D 128
18f6af633fSnbeams #define MAGMA_MAXTHREADS_3D 64
19f6af633fSnbeams // Define macro for determining number of threads in y-direction
20f6af633fSnbeams // for basis kernels
21f6af633fSnbeams #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x)))
22f6af633fSnbeams // Define macro for computing the total threads in a block
23f6af633fSnbeams // for use with __launch_bounds__()
24f6af633fSnbeams #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt))
25f6af633fSnbeams 
26e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP
27c42f38b1Snbeams typedef hipModule_t CeedMagmaModule;
28c42f38b1Snbeams typedef hipFunction_t CeedMagmaFunction;
29c42f38b1Snbeams #define CeedCompileMagma CeedCompileHip
30c42f38b1Snbeams #define CeedGetKernelMagma CeedGetKernelHip
31c42f38b1Snbeams #define CeedRunKernelMagma CeedRunKernelHip
32c42f38b1Snbeams #define CeedRunKernelDimMagma CeedRunKernelDimHip
33c42f38b1Snbeams #define CeedRunKernelDimSharedMagma CeedRunKernelDimSharedHip
34f6af633fSnbeams #else
35c42f38b1Snbeams typedef CUmodule CeedMagmaModule;
36c42f38b1Snbeams typedef CUfunction CeedMagmaFunction;
37c42f38b1Snbeams #define CeedCompileMagma CeedCompileCuda
38c42f38b1Snbeams #define CeedGetKernelMagma CeedGetKernelCuda
39c42f38b1Snbeams #define CeedRunKernelMagma CeedRunKernelCuda
40c42f38b1Snbeams #define CeedRunKernelDimMagma CeedRunKernelDimCuda
41c42f38b1Snbeams #define CeedRunKernelDimSharedMagma CeedRunKernelDimSharedCuda
42f6af633fSnbeams #endif
43f6af633fSnbeams 
44e0582403Sabdelfattah83 typedef enum {
45e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_GENERIC=101,
46e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_SPECIFIC=102
47e0582403Sabdelfattah83 } magma_kernel_mode_t;
48e0582403Sabdelfattah83 
49e0582403Sabdelfattah83 typedef struct {
50e0582403Sabdelfattah83   magma_kernel_mode_t basis_kernel_mode;
51e0582403Sabdelfattah83   magma_device_t device;
52e0582403Sabdelfattah83   magma_queue_t queue;
53e0582403Sabdelfattah83 } Ceed_Magma;
545a9ca9adSVeselin Dobrev 
557f5b9731SStan Tomov typedef struct {
56c42f38b1Snbeams   CeedMagmaModule module;
57c42f38b1Snbeams   CeedMagmaFunction magma_interp;
58c42f38b1Snbeams   CeedMagmaFunction magma_interp_tr;
59c42f38b1Snbeams   CeedMagmaFunction magma_grad;
60c42f38b1Snbeams   CeedMagmaFunction magma_grad_tr;
61c42f38b1Snbeams   CeedMagmaFunction magma_weight;
627f5b9731SStan Tomov   CeedScalar *dqref1d;
637f5b9731SStan Tomov   CeedScalar *dinterp1d;
647f5b9731SStan Tomov   CeedScalar *dgrad1d;
657f5b9731SStan Tomov   CeedScalar *dqweight1d;
667f5b9731SStan Tomov } CeedBasis_Magma;
677f5b9731SStan Tomov 
687f5b9731SStan Tomov typedef struct {
69868539c2SNatalie Beams   CeedScalar *dqref;
70868539c2SNatalie Beams   CeedScalar *dinterp;
71868539c2SNatalie Beams   CeedScalar *dgrad;
72868539c2SNatalie Beams   CeedScalar *dqweight;
73868539c2SNatalie Beams } CeedBasisNonTensor_Magma;
74868539c2SNatalie Beams 
75c8b3a627SJed Brown typedef enum {
76c8b3a627SJed Brown   OWNED_NONE = 0,
77c8b3a627SJed Brown   OWNED_UNPINNED,
78c8b3a627SJed Brown   OWNED_PINNED,
79c8b3a627SJed Brown } OwnershipMode;
80c8b3a627SJed Brown 
81868539c2SNatalie Beams typedef struct {
82c42f38b1Snbeams   CeedMagmaModule module;
83c42f38b1Snbeams   CeedMagmaFunction StridedTranspose;
84c42f38b1Snbeams   CeedMagmaFunction StridedNoTranspose;
85c42f38b1Snbeams   CeedMagmaFunction OffsetTranspose;
86c42f38b1Snbeams   CeedMagmaFunction OffsetNoTranspose;
87d655899aSNatalie Beams   CeedInt *offsets;
88d655899aSNatalie Beams   CeedInt *doffsets;
89c8b3a627SJed Brown   OwnershipMode own_;
90868539c2SNatalie Beams   int down_;            // cover a case where we own Device memory
91868539c2SNatalie Beams } CeedElemRestriction_Magma;
92868539c2SNatalie Beams 
93868539c2SNatalie Beams typedef struct {
947f5b9731SStan Tomov   const CeedScalar **inputs;
957f5b9731SStan Tomov   CeedScalar **outputs;
967f5b9731SStan Tomov   bool setupdone;
977f5b9731SStan Tomov } CeedQFunction_Magma;
987f5b9731SStan Tomov 
9990104f39SStan Tomov #define USE_MAGMA_BATCH
10097ee337cSStan Tomov #define USE_MAGMA_BATCH2
1017f5b9731SStan Tomov #define USE_MAGMA_BATCH3
1027f5b9731SStan Tomov #define USE_MAGMA_BATCH4
10390104f39SStan Tomov 
104*2dc3fb5fSabdelfattah83 CEED_INTERN  void
105*2dc3fb5fSabdelfattah83 magma_weight_nontensor(
106*2dc3fb5fSabdelfattah83   magma_int_t grid, magma_int_t threads, magma_int_t nelem,
107868539c2SNatalie Beams   magma_int_t Q,
10880a9ef05SNatalie Beams   CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue);
109e0582403Sabdelfattah83 
110*2dc3fb5fSabdelfattah83 CEED_INTERN  int
111*2dc3fb5fSabdelfattah83 magma_dgemm_nontensor(
112e0582403Sabdelfattah83   magma_trans_t transA, magma_trans_t transB,
113e0582403Sabdelfattah83   magma_int_t m, magma_int_t n, magma_int_t k,
114e0582403Sabdelfattah83   double alpha, const double *dA, magma_int_t ldda,
115e0582403Sabdelfattah83   const double *dB, magma_int_t lddb,
116e0582403Sabdelfattah83   double beta,  double *dC, magma_int_t lddc,
117e0582403Sabdelfattah83   magma_queue_t queue );
118e0582403Sabdelfattah83 
119*2dc3fb5fSabdelfattah83 CEED_INTERN  int
120*2dc3fb5fSabdelfattah83 magma_sgemm_nontensor(
12180a9ef05SNatalie Beams   magma_trans_t transA, magma_trans_t transB,
12280a9ef05SNatalie Beams   magma_int_t m, magma_int_t n, magma_int_t k,
12380a9ef05SNatalie Beams   float alpha, const float *dA, magma_int_t ldda,
12480a9ef05SNatalie Beams   const float *dB, magma_int_t lddb,
12580a9ef05SNatalie Beams   float beta,  float *dC, magma_int_t lddc,
12680a9ef05SNatalie Beams   magma_queue_t queue );
12780a9ef05SNatalie Beams 
128*2dc3fb5fSabdelfattah83 CEED_INTERN  void
129*2dc3fb5fSabdelfattah83 gemm_selector(
130*2dc3fb5fSabdelfattah83   int gpu_arch,
131*2dc3fb5fSabdelfattah83   char precision, char transA,
132*2dc3fb5fSabdelfattah83   int m, int n, int k,
133*2dc3fb5fSabdelfattah83   int *nbatch, int *use_magma );
134*2dc3fb5fSabdelfattah83 
135*2dc3fb5fSabdelfattah83 CEED_INTERN  magma_int_t
1367f5b9731SStan Tomov magma_isdevptr(const void *A);
1377f5b9731SStan Tomov 
138*2dc3fb5fSabdelfattah83 CEED_INTERN  int
139*2dc3fb5fSabdelfattah83 CeedBasisCreateTensorH1_Magma(
140*2dc3fb5fSabdelfattah83   CeedInt dim, CeedInt P1d,
141868539c2SNatalie Beams   CeedInt Q1d,
142868539c2SNatalie Beams   const CeedScalar *interp1d,
143868539c2SNatalie Beams   const CeedScalar *grad1d,
144868539c2SNatalie Beams   const CeedScalar *qref1d,
145868539c2SNatalie Beams   const CeedScalar *qweight1d,
146868539c2SNatalie Beams   CeedBasis basis);
1477f5b9731SStan Tomov 
148*2dc3fb5fSabdelfattah83 CEED_INTERN  int
149*2dc3fb5fSabdelfattah83 CeedBasisCreateH1_Magma(
150*2dc3fb5fSabdelfattah83   CeedElemTopology topo, CeedInt dim,
151d4f68153Sjeremylt   CeedInt ndof, CeedInt nqpts,
152d4f68153Sjeremylt   const CeedScalar *interp,
153d4f68153Sjeremylt   const CeedScalar *grad,
154d4f68153Sjeremylt   const CeedScalar *qref,
155d4f68153Sjeremylt   const CeedScalar *qweight,
156d4f68153Sjeremylt   CeedBasis basis);
157868539c2SNatalie Beams 
158*2dc3fb5fSabdelfattah83 CEED_INTERN  int
159*2dc3fb5fSabdelfattah83 CeedElemRestrictionCreate_Magma(
160*2dc3fb5fSabdelfattah83   CeedMemType mtype,
161868539c2SNatalie Beams   CeedCopyMode cmode,
162d655899aSNatalie Beams   const CeedInt *offsets,
163868539c2SNatalie Beams   CeedElemRestriction r);
164868539c2SNatalie Beams 
165*2dc3fb5fSabdelfattah83 CEED_INTERN  int
166*2dc3fb5fSabdelfattah83 CeedElemRestrictionCreateBlocked_Magma(
167*2dc3fb5fSabdelfattah83   const CeedMemType mtype,
168868539c2SNatalie Beams   const CeedCopyMode cmode,
169d655899aSNatalie Beams   const CeedInt *offsets,
170868539c2SNatalie Beams   const CeedElemRestriction res);
171a8c028e3SNatalie Beams 
172*2dc3fb5fSabdelfattah83 CEED_INTERN  int CeedOperatorCreate_Magma(CeedOperator op);
1737f5b9731SStan Tomov 
1747f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function
1757f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr
1767f5b9731SStan Tomov 
177e0582403Sabdelfattah83 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
178e0582403Sabdelfattah83 // should do nothing
179e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...)
180e0582403Sabdelfattah83 
1817f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
1827f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE
1837f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000)
1847f5b9731SStan Tomov #endif
185e0582403Sabdelfattah83 
1863d576824SJeremy L Thompson #endif  // _ceed_magma_h
187