xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision 2b730f8b5a9c809740a0b3b302db43a719c636b1)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
34444f328STzanio //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
54444f328STzanio //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
74444f328STzanio 
890104f39SStan Tomov // magma functions specific to ceed
9972b3d9dSNatalie Beams #ifndef _ceed_magma_h
103d576824SJeremy L Thompson #define _ceed_magma_h
1190104f39SStan Tomov 
12ec3da8bcSJed Brown #include <ceed/backend.h>
13*2b730f8bSJeremy L Thompson #include <ceed/ceed.h>
14e0582403Sabdelfattah83 #include <magma_v2.h>
15e0582403Sabdelfattah83 
16f6af633fSnbeams #define MAGMA_MAXTHREADS_1D 128
17f6af633fSnbeams #define MAGMA_MAXTHREADS_2D 128
18f6af633fSnbeams #define MAGMA_MAXTHREADS_3D 64
19f6af633fSnbeams // Define macro for determining number of threads in y-direction
20f6af633fSnbeams // for basis kernels
21f6af633fSnbeams #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x)))
22f6af633fSnbeams // Define macro for computing the total threads in a block
23f6af633fSnbeams // for use with __launch_bounds__()
24f6af633fSnbeams #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt))
25f6af633fSnbeams 
26e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP
27c42f38b1Snbeams typedef hipModule_t   CeedMagmaModule;
28c42f38b1Snbeams typedef hipFunction_t CeedMagmaFunction;
29c42f38b1Snbeams #define CeedCompileMagma CeedCompileHip
30c42f38b1Snbeams #define CeedGetKernelMagma CeedGetKernelHip
31c42f38b1Snbeams #define CeedRunKernelMagma CeedRunKernelHip
32c42f38b1Snbeams #define CeedRunKernelDimMagma CeedRunKernelDimHip
33c42f38b1Snbeams #define CeedRunKernelDimSharedMagma CeedRunKernelDimSharedHip
34f6af633fSnbeams #else
35c42f38b1Snbeams typedef CUmodule   CeedMagmaModule;
36c42f38b1Snbeams typedef CUfunction CeedMagmaFunction;
37c42f38b1Snbeams #define CeedCompileMagma CeedCompileCuda
38c42f38b1Snbeams #define CeedGetKernelMagma CeedGetKernelCuda
39c42f38b1Snbeams #define CeedRunKernelMagma CeedRunKernelCuda
40c42f38b1Snbeams #define CeedRunKernelDimMagma CeedRunKernelDimCuda
41c42f38b1Snbeams #define CeedRunKernelDimSharedMagma CeedRunKernelDimSharedCuda
42f6af633fSnbeams #endif
43f6af633fSnbeams 
44*2b730f8bSJeremy L Thompson typedef enum { MAGMA_KERNEL_DIM_GENERIC = 101, MAGMA_KERNEL_DIM_SPECIFIC = 102 } magma_kernel_mode_t;
45e0582403Sabdelfattah83 
46e0582403Sabdelfattah83 typedef struct {
47e0582403Sabdelfattah83   magma_kernel_mode_t basis_kernel_mode;
48e0582403Sabdelfattah83   magma_device_t      device;
49e0582403Sabdelfattah83   magma_queue_t       queue;
50e0582403Sabdelfattah83 } Ceed_Magma;
515a9ca9adSVeselin Dobrev 
527f5b9731SStan Tomov typedef struct {
53c42f38b1Snbeams   CeedMagmaModule   module;
54c42f38b1Snbeams   CeedMagmaFunction magma_interp;
55c42f38b1Snbeams   CeedMagmaFunction magma_interp_tr;
56c42f38b1Snbeams   CeedMagmaFunction magma_grad;
57c42f38b1Snbeams   CeedMagmaFunction magma_grad_tr;
58c42f38b1Snbeams   CeedMagmaFunction magma_weight;
597f5b9731SStan Tomov   CeedScalar       *dqref1d;
607f5b9731SStan Tomov   CeedScalar       *dinterp1d;
617f5b9731SStan Tomov   CeedScalar       *dgrad1d;
627f5b9731SStan Tomov   CeedScalar       *dqweight1d;
637f5b9731SStan Tomov } CeedBasis_Magma;
647f5b9731SStan Tomov 
657f5b9731SStan Tomov typedef struct {
66868539c2SNatalie Beams   CeedScalar *dqref;
67868539c2SNatalie Beams   CeedScalar *dinterp;
68868539c2SNatalie Beams   CeedScalar *dgrad;
69868539c2SNatalie Beams   CeedScalar *dqweight;
70868539c2SNatalie Beams } CeedBasisNonTensor_Magma;
71868539c2SNatalie Beams 
72c8b3a627SJed Brown typedef enum {
73c8b3a627SJed Brown   OWNED_NONE = 0,
74c8b3a627SJed Brown   OWNED_UNPINNED,
75c8b3a627SJed Brown   OWNED_PINNED,
76c8b3a627SJed Brown } OwnershipMode;
77c8b3a627SJed Brown 
78868539c2SNatalie Beams typedef struct {
79c42f38b1Snbeams   CeedMagmaModule   module;
80c42f38b1Snbeams   CeedMagmaFunction StridedTranspose;
81c42f38b1Snbeams   CeedMagmaFunction StridedNoTranspose;
82c42f38b1Snbeams   CeedMagmaFunction OffsetTranspose;
83c42f38b1Snbeams   CeedMagmaFunction OffsetNoTranspose;
84d655899aSNatalie Beams   CeedInt          *offsets;
85d655899aSNatalie Beams   CeedInt          *doffsets;
86c8b3a627SJed Brown   OwnershipMode     own_;
87868539c2SNatalie Beams   int               down_;  // cover a case where we own Device memory
88868539c2SNatalie Beams } CeedElemRestriction_Magma;
89868539c2SNatalie Beams 
90868539c2SNatalie Beams typedef struct {
917f5b9731SStan Tomov   const CeedScalar **inputs;
927f5b9731SStan Tomov   CeedScalar       **outputs;
937f5b9731SStan Tomov   bool               setupdone;
947f5b9731SStan Tomov } CeedQFunction_Magma;
957f5b9731SStan Tomov 
9690104f39SStan Tomov #define USE_MAGMA_BATCH
9797ee337cSStan Tomov #define USE_MAGMA_BATCH2
987f5b9731SStan Tomov #define USE_MAGMA_BATCH3
997f5b9731SStan Tomov #define USE_MAGMA_BATCH4
10090104f39SStan Tomov 
101*2b730f8bSJeremy L Thompson CEED_INTERN void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem, magma_int_t Q, CeedScalar *dqweight, CeedScalar *dv,
102e0582403Sabdelfattah83                                         magma_queue_t queue);
103e0582403Sabdelfattah83 
104*2b730f8bSJeremy L Thompson CEED_INTERN int magma_dgemm_nontensor(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha,
105*2b730f8bSJeremy L Thompson                                       const double *dA, magma_int_t ldda, const double *dB, magma_int_t lddb, double beta, double *dC,
106*2b730f8bSJeremy L Thompson                                       magma_int_t lddc, magma_queue_t queue);
107*2b730f8bSJeremy L Thompson 
108*2b730f8bSJeremy L Thompson CEED_INTERN int magma_sgemm_nontensor(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha,
109*2b730f8bSJeremy L Thompson                                       const float *dA, magma_int_t ldda, const float *dB, magma_int_t lddb, float beta, float *dC, magma_int_t lddc,
11080a9ef05SNatalie Beams                                       magma_queue_t queue);
11180a9ef05SNatalie Beams 
112*2b730f8bSJeremy L Thompson CEED_INTERN void gemm_selector(int gpu_arch, char precision, char transA, int m, int n, int k, int *nbatch, int *use_magma);
1132dc3fb5fSabdelfattah83 
114*2b730f8bSJeremy L Thompson CEED_INTERN magma_int_t magma_isdevptr(const void *A);
1157f5b9731SStan Tomov 
116*2b730f8bSJeremy L Thompson CEED_INTERN int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, const CeedScalar *interp1d, const CeedScalar *grad1d,
117*2b730f8bSJeremy L Thompson                                               const CeedScalar *qref1d, const CeedScalar *qweight1d, CeedBasis basis);
1187f5b9731SStan Tomov 
119*2b730f8bSJeremy L Thompson CEED_INTERN int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof, CeedInt nqpts, const CeedScalar *interp,
120*2b730f8bSJeremy L Thompson                                         const CeedScalar *grad, const CeedScalar *qref, const CeedScalar *qweight, CeedBasis basis);
121868539c2SNatalie Beams 
122*2b730f8bSJeremy L Thompson CEED_INTERN int CeedElemRestrictionCreate_Magma(CeedMemType mtype, CeedCopyMode cmode, const CeedInt *offsets, CeedElemRestriction r);
123868539c2SNatalie Beams 
124*2b730f8bSJeremy L Thompson CEED_INTERN int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype, const CeedCopyMode cmode, const CeedInt *offsets,
125868539c2SNatalie Beams                                                        const CeedElemRestriction res);
126a8c028e3SNatalie Beams 
1272dc3fb5fSabdelfattah83 CEED_INTERN int CeedOperatorCreate_Magma(CeedOperator op);
1287f5b9731SStan Tomov 
1297f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function
1307f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr
1317f5b9731SStan Tomov 
132e0582403Sabdelfattah83 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
133e0582403Sabdelfattah83 // should do nothing
134e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...)
135e0582403Sabdelfattah83 
1367f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
1377f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE
1387f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000)
1397f5b9731SStan Tomov #endif
140e0582403Sabdelfattah83 
1413d576824SJeremy L Thompson #endif  // _ceed_magma_h
142