xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision 38293ee66094a4bc140c5a2101071dba903c8073)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
34444f328STzanio //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
54444f328STzanio //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
74444f328STzanio 
890104f39SStan Tomov // magma functions specific to ceed
994b7b29bSJeremy L Thompson #ifndef CEED_MAGMA_H
1094b7b29bSJeremy L Thompson #define CEED_MAGMA_H
1190104f39SStan Tomov 
1249aac155SJeremy L Thompson #include <ceed.h>
13ec3da8bcSJed Brown #include <ceed/backend.h>
14e0582403Sabdelfattah83 #include <magma_v2.h>
15e0582403Sabdelfattah83 
16f6af633fSnbeams #define MAGMA_MAXTHREADS_1D 128
17f6af633fSnbeams #define MAGMA_MAXTHREADS_2D 128
18f6af633fSnbeams #define MAGMA_MAXTHREADS_3D 64
19023b8a51Sabdelfattah83 #define MAGMA_NONTENSOR_MAXTHREADS (128)
20023b8a51Sabdelfattah83 
21f6af633fSnbeams // Define macro for determining number of threads in y-direction
22f6af633fSnbeams // for basis kernels
23f6af633fSnbeams #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x)))
24023b8a51Sabdelfattah83 #define MAGMA_NONTENSOR_BASIS_NTCOL(N) (CeedIntMax(1, (MAGMA_NONTENSOR_MAXTHREADS / (N))))
25023b8a51Sabdelfattah83 #define MAGMA_CEILDIV(A, B) (((A) + (B)-1) / (B))
26023b8a51Sabdelfattah83 
27023b8a51Sabdelfattah83 #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P (40)
28023b8a51Sabdelfattah83 #define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q (40)
29023b8a51Sabdelfattah83 
30f6af633fSnbeams // Define macro for computing the total threads in a block
31f6af633fSnbeams // for use with __launch_bounds__()
32f6af633fSnbeams #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt))
33f6af633fSnbeams 
34023b8a51Sabdelfattah83 // Define macro for non-tensor kernel instances
35023b8a51Sabdelfattah83 #define MAGMA_NONTENSOR_KERNEL_INSTANCES (5)
36023b8a51Sabdelfattah83 #define MAGMA_NONTENSOR_N_VALUES 10240, 51200, 102400, 512000, 1024000
37023b8a51Sabdelfattah83 
38e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP
39c42f38b1Snbeams typedef hipModule_t   CeedMagmaModule;
40c42f38b1Snbeams typedef hipFunction_t CeedMagmaFunction;
41eb7e6cafSJeremy L Thompson #define CeedCompileMagma CeedCompile_Hip
42eb7e6cafSJeremy L Thompson #define CeedGetKernelMagma CeedGetKernel_Hip
43eb7e6cafSJeremy L Thompson #define CeedRunKernelMagma CeedRunKernel_Hip
44eb7e6cafSJeremy L Thompson #define CeedRunKernelDimMagma CeedRunKernelDim_Hip
45eb7e6cafSJeremy L Thompson #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Hip
46f6af633fSnbeams #else
47c42f38b1Snbeams typedef CUmodule   CeedMagmaModule;
48c42f38b1Snbeams typedef CUfunction CeedMagmaFunction;
49eb7e6cafSJeremy L Thompson #define CeedCompileMagma CeedCompile_Cuda
50eb7e6cafSJeremy L Thompson #define CeedGetKernelMagma CeedGetKernel_Cuda
51eb7e6cafSJeremy L Thompson #define CeedRunKernelMagma CeedRunKernel_Cuda
52eb7e6cafSJeremy L Thompson #define CeedRunKernelDimMagma CeedRunKernelDim_Cuda
53eb7e6cafSJeremy L Thompson #define CeedRunKernelDimSharedMagma CeedRunKernelDimShared_Cuda
54f6af633fSnbeams #endif
55f6af633fSnbeams 
56e0582403Sabdelfattah83 typedef struct {
57c42f38b1Snbeams   CeedMagmaModule   module;
58c42f38b1Snbeams   CeedMagmaFunction magma_interp;
59c42f38b1Snbeams   CeedMagmaFunction magma_interp_tr;
60c42f38b1Snbeams   CeedMagmaFunction magma_grad;
61c42f38b1Snbeams   CeedMagmaFunction magma_grad_tr;
62c42f38b1Snbeams   CeedMagmaFunction magma_weight;
63*38293ee6SJeremy L Thompson   CeedScalar       *d_q_ref_1d;
64*38293ee6SJeremy L Thompson   CeedScalar       *d_interp_1d;
65*38293ee6SJeremy L Thompson   CeedScalar       *d_grad_1d;
66*38293ee6SJeremy L Thompson   CeedScalar       *d_q_weight_1d;
677f5b9731SStan Tomov } CeedBasis_Magma;
687f5b9731SStan Tomov 
697f5b9731SStan Tomov typedef struct {
70023b8a51Sabdelfattah83   CeedMagmaModule   module[MAGMA_NONTENSOR_KERNEL_INSTANCES];
71023b8a51Sabdelfattah83   CeedMagmaFunction magma_interp_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
72023b8a51Sabdelfattah83   CeedMagmaFunction magma_interp_tr_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
73023b8a51Sabdelfattah83   CeedMagmaFunction magma_grad_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
74023b8a51Sabdelfattah83   CeedMagmaFunction magma_grad_tr_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
75*38293ee6SJeremy L Thompson   CeedScalar       *d_q_ref;
76*38293ee6SJeremy L Thompson   CeedScalar       *d_interp;
77*38293ee6SJeremy L Thompson   CeedScalar       *d_grad;
78*38293ee6SJeremy L Thompson   CeedScalar       *d_q_weight;
79868539c2SNatalie Beams } CeedBasisNonTensor_Magma;
80868539c2SNatalie Beams 
81*38293ee6SJeremy L Thompson CEED_INTERN void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t num_elem, magma_int_t Q, CeedScalar *d_q_weight,
82*38293ee6SJeremy L Thompson                                         CeedScalar *d_v, magma_queue_t queue);
83e0582403Sabdelfattah83 
84*38293ee6SJeremy L Thompson CEED_INTERN int magma_gemm_nontensor(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha,
85*38293ee6SJeremy L Thompson                                      const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta,
86*38293ee6SJeremy L Thompson                                      CeedScalar *d_C, magma_int_t lddc, magma_queue_t queue);
872b730f8bSJeremy L Thompson 
88*38293ee6SJeremy L Thompson CEED_INTERN void gemm_selector(int gpu_arch, char precision, char trans_A, int m, int n, int k, int *n_batch, int *use_magma);
892dc3fb5fSabdelfattah83 
90*38293ee6SJeremy L Thompson CEED_INTERN CeedInt nontensor_rtc_get_nb(int gpu_arch, char precision, CeedEvalMode e_mode, CeedTransposeMode t_mode, int P_, int N, int Q_);
91023b8a51Sabdelfattah83 
922b730f8bSJeremy L Thompson CEED_INTERN magma_int_t magma_isdevptr(const void *A);
937f5b9731SStan Tomov 
94*38293ee6SJeremy L Thompson CEED_INTERN int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
95*38293ee6SJeremy L Thompson                                               const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis);
967f5b9731SStan Tomov 
97*38293ee6SJeremy L Thompson CEED_INTERN int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_dof, CeedInt num_qpts, const CeedScalar *interp,
98*38293ee6SJeremy L Thompson                                         const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis);
99868539c2SNatalie Beams 
10058549094SSebastian Grimberg // Comment the line below to use the default magma_is_devptr function
1017f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr
1027f5b9731SStan Tomov 
10358549094SSebastian Grimberg // If magma and cuda/ref are using the null stream, then ceed_magma_queue_sync should do nothing
104e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...)
105e0582403Sabdelfattah83 
10694b7b29bSJeremy L Thompson #endif  // CEED_MAGMA_H
107