1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // magma functions specific to ceed 18 #ifndef _ceed_magma_h 19 #define _ceed_magma_h 20 21 #include <ceed/ceed.h> 22 #include <ceed/backend.h> 23 #include <magma_v2.h> 24 25 typedef enum { 26 MAGMA_KERNEL_DIM_GENERIC=101, 27 MAGMA_KERNEL_DIM_SPECIFIC=102 28 } magma_kernel_mode_t; 29 30 typedef struct { 31 magma_kernel_mode_t basis_kernel_mode; 32 magma_device_t device; 33 magma_queue_t queue; 34 } Ceed_Magma; 35 36 typedef struct { 37 CeedScalar *dqref1d; 38 CeedScalar *dinterp1d; 39 CeedScalar *dgrad1d; 40 CeedScalar *dqweight1d; 41 } CeedBasis_Magma; 42 43 typedef struct { 44 CeedScalar *dqref; 45 CeedScalar *dinterp; 46 CeedScalar *dgrad; 47 CeedScalar *dqweight; 48 } CeedBasisNonTensor_Magma; 49 50 typedef enum { 51 OWNED_NONE = 0, 52 OWNED_UNPINNED, 53 OWNED_PINNED, 54 } OwnershipMode; 55 56 typedef struct { 57 CeedInt *offsets; 58 CeedInt *doffsets; 59 OwnershipMode own_; 60 int down_; // cover a case where we own Device memory 61 } CeedElemRestriction_Magma; 62 63 typedef struct { 64 const CeedScalar **inputs; 65 CeedScalar **outputs; 66 bool setupdone; 67 } CeedQFunction_Magma; 68 69 #define USE_MAGMA_BATCH 70 #define USE_MAGMA_BATCH2 71 #define USE_MAGMA_BATCH3 72 #define USE_MAGMA_BATCH4 73 74 #ifdef __cplusplus 75 CEED_INTERN { 76 #endif 77 78 magma_int_t magma_interp_1d( 79 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 80 const CeedScalar *dT, CeedTransposeMode tmode, 81 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 82 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 83 magma_int_t nelem, magma_queue_t queue); 84 85 magma_int_t magma_interp_2d( 86 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 87 const CeedScalar *dT, CeedTransposeMode tmode, 88 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 89 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 90 magma_int_t nelem, magma_queue_t queue); 91 92 magma_int_t magma_interp_3d( 93 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 94 const CeedScalar *dT, CeedTransposeMode tmode, 95 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 96 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 97 magma_int_t nelem, magma_queue_t queue); 98 99 magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q, 100 magma_int_t dim, magma_int_t ncomp, 101 const CeedScalar *dT, CeedTransposeMode tmode, 102 const CeedScalar *dU, magma_int_t u_elemstride, 103 magma_int_t cstrdU, 104 CeedScalar *dV, magma_int_t v_elemstride, 105 magma_int_t cstrdV, 106 magma_int_t nelem, magma_queue_t queue); 107 108 magma_int_t magma_interp( 109 magma_int_t P, magma_int_t Q, 110 magma_int_t dim, magma_int_t ncomp, 111 const CeedScalar *dT, CeedTransposeMode tmode, 112 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 113 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 114 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue); 115 116 magma_int_t magma_grad_1d( 117 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 118 const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode, 119 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 120 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 121 magma_int_t nelem, magma_queue_t queue); 122 123 magma_int_t magma_gradn_2d( 124 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 125 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 126 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 127 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 128 magma_int_t nelem, magma_queue_t queue); 129 130 magma_int_t magma_gradt_2d( 131 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 132 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 133 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 134 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 135 magma_int_t nelem, magma_queue_t queue); 136 137 magma_int_t magma_gradn_3d( 138 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 139 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 140 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 141 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 142 magma_int_t nelem, magma_queue_t queue); 143 144 magma_int_t magma_gradt_3d( 145 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 146 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 147 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 148 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 149 magma_int_t nelem, magma_queue_t queue); 150 151 magma_int_t magma_grad_generic( 152 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 153 const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 154 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 155 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 156 magma_int_t nelem, magma_queue_t queue); 157 158 magma_int_t magma_grad( 159 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 160 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 161 const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU, 162 CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV, 163 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue); 164 165 magma_int_t magma_weight_1d( 166 magma_int_t Q, const CeedScalar *dqweight1d, 167 CeedScalar *dV, magma_int_t v_stride, 168 magma_int_t nelem, magma_queue_t queue); 169 170 magma_int_t magma_weight_2d( 171 magma_int_t Q, const CeedScalar *dqweight1d, 172 CeedScalar *dV, magma_int_t v_stride, 173 magma_int_t nelem, magma_queue_t queue); 174 175 magma_int_t magma_weight_3d( 176 magma_int_t Q, const CeedScalar *dqweight1d, 177 CeedScalar *dV, magma_int_t v_stride, 178 magma_int_t nelem, magma_queue_t queue); 179 180 magma_int_t magma_weight_generic( 181 magma_int_t Q, magma_int_t dim, 182 const CeedScalar *dqweight1d, 183 CeedScalar *dV, magma_int_t vstride, 184 magma_int_t nelem, magma_queue_t queue); 185 186 magma_int_t magma_weight( 187 magma_int_t Q, magma_int_t dim, 188 const CeedScalar *dqweight1d, 189 CeedScalar *dV, magma_int_t v_stride, 190 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue); 191 192 void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem, 193 magma_int_t Q, 194 CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue); 195 196 void magma_readDofsOffset(const magma_int_t NCOMP, 197 const magma_int_t compstride, 198 const magma_int_t esize, const magma_int_t nelem, 199 magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv, 200 magma_queue_t queue); 201 202 void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 203 const magma_int_t nelem, magma_int_t *strides, 204 const CeedScalar *du, CeedScalar *dv, 205 magma_queue_t queue); 206 207 void magma_writeDofsOffset(const magma_int_t NCOMP, 208 const magma_int_t compstride, 209 const magma_int_t esize, const magma_int_t nelem, 210 magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv, 211 magma_queue_t queue); 212 213 void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 214 const magma_int_t nelem, magma_int_t *strides, 215 const CeedScalar *du, CeedScalar *dv, 216 magma_queue_t queue); 217 218 int magma_dgemm_nontensor( 219 magma_trans_t transA, magma_trans_t transB, 220 magma_int_t m, magma_int_t n, magma_int_t k, 221 double alpha, const double *dA, magma_int_t ldda, 222 const double *dB, magma_int_t lddb, 223 double beta, double *dC, magma_int_t lddc, 224 magma_queue_t queue ); 225 226 int magma_sgemm_nontensor( 227 magma_trans_t transA, magma_trans_t transB, 228 magma_int_t m, magma_int_t n, magma_int_t k, 229 float alpha, const float *dA, magma_int_t ldda, 230 const float *dB, magma_int_t lddb, 231 float beta, float *dC, magma_int_t lddc, 232 magma_queue_t queue ); 233 234 magma_int_t 235 magma_isdevptr(const void *A); 236 237 int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, 238 CeedInt Q1d, 239 const CeedScalar *interp1d, 240 const CeedScalar *grad1d, 241 const CeedScalar *qref1d, 242 const CeedScalar *qweight1d, 243 CeedBasis basis); 244 245 int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, 246 CeedInt ndof, CeedInt nqpts, 247 const CeedScalar *interp, 248 const CeedScalar *grad, 249 const CeedScalar *qref, 250 const CeedScalar *qweight, 251 CeedBasis basis); 252 253 int CeedElemRestrictionCreate_Magma(CeedMemType mtype, 254 CeedCopyMode cmode, 255 const CeedInt *offsets, 256 CeedElemRestriction r); 257 258 int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype, 259 const CeedCopyMode cmode, 260 const CeedInt *offsets, 261 const CeedElemRestriction res); 262 263 int CeedOperatorCreate_Magma(CeedOperator op); 264 265 #ifdef __cplusplus 266 } 267 #endif 268 269 // comment the line below to use the default magma_is_devptr function 270 #define magma_is_devptr magma_isdevptr 271 272 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync 273 // should do nothing 274 #define ceed_magma_queue_sync(...) 275 276 // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value> 277 #ifndef MAGMA_BATCH_STRIDE 278 #define MAGMA_BATCH_STRIDE (1000) 279 #endif 280 281 #endif // _ceed_magma_h 282