14444f328STzanio // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 24444f328STzanio // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 34444f328STzanio // reserved. See files LICENSE and NOTICE for details. 44444f328STzanio // 54444f328STzanio // This file is part of CEED, a collection of benchmarks, miniapps, software 64444f328STzanio // libraries and APIs for efficient high-order finite element and spectral 74444f328STzanio // element discretizations for exascale applications. For more information and 84444f328STzanio // source code availability see http://github.com/ceed. 94444f328STzanio // 104444f328STzanio // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 114444f328STzanio // a collaborative effort of two U.S. Department of Energy organizations (Office 124444f328STzanio // of Science and the National Nuclear Security Administration) responsible for 134444f328STzanio // the planning and preparation of a capable exascale ecosystem, including 144444f328STzanio // software, applications, hardware, advanced system engineering and early 154444f328STzanio // testbed platforms, in support of the nation's exascale computing imperative. 164444f328STzanio 1790104f39SStan Tomov // magma functions specific to ceed 18972b3d9dSNatalie Beams #ifndef _ceed_magma_h 193d576824SJeremy L Thompson #define _ceed_magma_h 2090104f39SStan Tomov 21ec3da8bcSJed Brown #include <ceed/ceed.h> 22ec3da8bcSJed Brown #include <ceed/backend.h> 23e0582403Sabdelfattah83 #include <magma_v2.h> 24e0582403Sabdelfattah83 25e0582403Sabdelfattah83 typedef enum { 26e0582403Sabdelfattah83 MAGMA_KERNEL_DIM_GENERIC=101, 27e0582403Sabdelfattah83 MAGMA_KERNEL_DIM_SPECIFIC=102 28e0582403Sabdelfattah83 } magma_kernel_mode_t; 29e0582403Sabdelfattah83 30e0582403Sabdelfattah83 typedef struct { 31e0582403Sabdelfattah83 magma_kernel_mode_t basis_kernel_mode; 32e0582403Sabdelfattah83 magma_int_t maxthreads[3]; 33e0582403Sabdelfattah83 magma_device_t device; 34e0582403Sabdelfattah83 magma_queue_t queue; 35e0582403Sabdelfattah83 } Ceed_Magma; 365a9ca9adSVeselin Dobrev 377f5b9731SStan Tomov typedef struct { 387f5b9731SStan Tomov CeedScalar *dqref1d; 397f5b9731SStan Tomov CeedScalar *dinterp1d; 407f5b9731SStan Tomov CeedScalar *dgrad1d; 417f5b9731SStan Tomov CeedScalar *dqweight1d; 427f5b9731SStan Tomov } CeedBasis_Magma; 437f5b9731SStan Tomov 447f5b9731SStan Tomov typedef struct { 45868539c2SNatalie Beams CeedScalar *dqref; 46868539c2SNatalie Beams CeedScalar *dinterp; 47868539c2SNatalie Beams CeedScalar *dgrad; 48868539c2SNatalie Beams CeedScalar *dqweight; 49868539c2SNatalie Beams } CeedBasisNonTensor_Magma; 50868539c2SNatalie Beams 51*c8b3a627SJed Brown typedef enum { 52*c8b3a627SJed Brown OWNED_NONE = 0, 53*c8b3a627SJed Brown OWNED_UNPINNED, 54*c8b3a627SJed Brown OWNED_PINNED, 55*c8b3a627SJed Brown } OwnershipMode; 56*c8b3a627SJed Brown 57868539c2SNatalie Beams typedef struct { 58d655899aSNatalie Beams CeedInt *offsets; 59d655899aSNatalie Beams CeedInt *doffsets; 60*c8b3a627SJed Brown OwnershipMode own_; 61868539c2SNatalie Beams int down_; // cover a case where we own Device memory 62868539c2SNatalie Beams } CeedElemRestriction_Magma; 63868539c2SNatalie Beams 64868539c2SNatalie Beams typedef struct { 657f5b9731SStan Tomov const CeedScalar **inputs; 667f5b9731SStan Tomov CeedScalar **outputs; 677f5b9731SStan Tomov bool setupdone; 687f5b9731SStan Tomov } CeedQFunction_Magma; 697f5b9731SStan Tomov 7090104f39SStan Tomov #define USE_MAGMA_BATCH 7197ee337cSStan Tomov #define USE_MAGMA_BATCH2 727f5b9731SStan Tomov #define USE_MAGMA_BATCH3 737f5b9731SStan Tomov #define USE_MAGMA_BATCH4 7490104f39SStan Tomov 757f5b9731SStan Tomov #ifdef __cplusplus 767f5b9731SStan Tomov CEED_INTERN { 777f5b9731SStan Tomov #endif 78e0582403Sabdelfattah83 79e0582403Sabdelfattah83 magma_int_t magma_interp_1d( 80e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 81e0582403Sabdelfattah83 const CeedScalar *dT, CeedTransposeMode tmode, 82e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 83e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 84e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 85e0582403Sabdelfattah83 86e0582403Sabdelfattah83 magma_int_t magma_interp_2d( 87e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 88e0582403Sabdelfattah83 const CeedScalar *dT, CeedTransposeMode tmode, 89e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 90e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 91e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 92e0582403Sabdelfattah83 93e0582403Sabdelfattah83 magma_int_t magma_interp_3d( 94e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 95e0582403Sabdelfattah83 const CeedScalar *dT, CeedTransposeMode tmode, 96e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 97e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 98e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 99e0582403Sabdelfattah83 100e0582403Sabdelfattah83 magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q, 101868539c2SNatalie Beams magma_int_t dim, magma_int_t ncomp, 10280a9ef05SNatalie Beams const CeedScalar *dT, CeedTransposeMode tmode, 10380a9ef05SNatalie Beams const CeedScalar *dU, magma_int_t u_elemstride, 104e0582403Sabdelfattah83 magma_int_t cstrdU, 10580a9ef05SNatalie Beams CeedScalar *dV, magma_int_t v_elemstride, 106e0582403Sabdelfattah83 magma_int_t cstrdV, 107e0582403Sabdelfattah83 magma_int_t nelem, magma_queue_t queue); 1087f5b9731SStan Tomov 109e0582403Sabdelfattah83 magma_int_t magma_interp( 110e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, 111868539c2SNatalie Beams magma_int_t dim, magma_int_t ncomp, 11280a9ef05SNatalie Beams const CeedScalar *dT, CeedTransposeMode tmode, 11380a9ef05SNatalie Beams const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 11480a9ef05SNatalie Beams CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 115e0582403Sabdelfattah83 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 1167f5b9731SStan Tomov 117e0582403Sabdelfattah83 magma_int_t magma_grad_1d( 118e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 119e0582403Sabdelfattah83 const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode, 120e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 121e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 122e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 123868539c2SNatalie Beams 124e0582403Sabdelfattah83 magma_int_t magma_gradn_2d( 125e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 126e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 127e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 128e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 129e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 130e0582403Sabdelfattah83 131e0582403Sabdelfattah83 magma_int_t magma_gradt_2d( 132e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 133e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 134e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 135e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 136e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 137e0582403Sabdelfattah83 138e0582403Sabdelfattah83 magma_int_t magma_gradn_3d( 139e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 140e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 141e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 142e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 143e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 144e0582403Sabdelfattah83 145e0582403Sabdelfattah83 magma_int_t magma_gradt_3d( 146e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 147e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 148e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 149e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 150e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 151e0582403Sabdelfattah83 152e0582403Sabdelfattah83 magma_int_t magma_grad_generic( 153e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 154e0582403Sabdelfattah83 const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 155e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 156e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 157e0582403Sabdelfattah83 magma_int_t nelem, magma_queue_t queue); 158e0582403Sabdelfattah83 159e0582403Sabdelfattah83 magma_int_t magma_grad( 160e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 161e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 162e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU, 163e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV, 164e0582403Sabdelfattah83 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 165e0582403Sabdelfattah83 166e0582403Sabdelfattah83 magma_int_t magma_weight_1d( 167e0582403Sabdelfattah83 magma_int_t Q, const CeedScalar *dqweight1d, 168e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 169e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 170e0582403Sabdelfattah83 171e0582403Sabdelfattah83 magma_int_t magma_weight_2d( 172e0582403Sabdelfattah83 magma_int_t Q, const CeedScalar *dqweight1d, 173e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 174e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 175e0582403Sabdelfattah83 176e0582403Sabdelfattah83 magma_int_t magma_weight_3d( 177e0582403Sabdelfattah83 magma_int_t Q, const CeedScalar *dqweight1d, 178e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 179e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 180e0582403Sabdelfattah83 181e0582403Sabdelfattah83 magma_int_t magma_weight_generic( 182e0582403Sabdelfattah83 magma_int_t Q, magma_int_t dim, 183e0582403Sabdelfattah83 const CeedScalar *dqweight1d, 184e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t vstride, 185e0582403Sabdelfattah83 magma_int_t nelem, magma_queue_t queue); 186e0582403Sabdelfattah83 187e0582403Sabdelfattah83 magma_int_t magma_weight( 188e0582403Sabdelfattah83 magma_int_t Q, magma_int_t dim, 189e0582403Sabdelfattah83 const CeedScalar *dqweight1d, 190e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 191e0582403Sabdelfattah83 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 192e0582403Sabdelfattah83 193e0582403Sabdelfattah83 void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem, 194868539c2SNatalie Beams magma_int_t Q, 19580a9ef05SNatalie Beams CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue); 196e0582403Sabdelfattah83 1978dc8d968Sjeremylt void magma_readDofsOffset(const magma_int_t NCOMP, 1988dc8d968Sjeremylt const magma_int_t compstride, 1998dc8d968Sjeremylt const magma_int_t esize, const magma_int_t nelem, 20080a9ef05SNatalie Beams magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv, 201e0582403Sabdelfattah83 magma_queue_t queue); 202868539c2SNatalie Beams 2038dc8d968Sjeremylt void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 204266dd7abSnbeams const magma_int_t nelem, magma_int_t *strides, 20580a9ef05SNatalie Beams const CeedScalar *du, CeedScalar *dv, 206e0582403Sabdelfattah83 magma_queue_t queue); 207266dd7abSnbeams 2088dc8d968Sjeremylt void magma_writeDofsOffset(const magma_int_t NCOMP, 2098dc8d968Sjeremylt const magma_int_t compstride, 2108dc8d968Sjeremylt const magma_int_t esize, const magma_int_t nelem, 21180a9ef05SNatalie Beams magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv, 212e0582403Sabdelfattah83 magma_queue_t queue); 213868539c2SNatalie Beams 2148dc8d968Sjeremylt void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 215266dd7abSnbeams const magma_int_t nelem, magma_int_t *strides, 21680a9ef05SNatalie Beams const CeedScalar *du, CeedScalar *dv, 217e0582403Sabdelfattah83 magma_queue_t queue); 218e0582403Sabdelfattah83 219e0582403Sabdelfattah83 int magma_dgemm_nontensor( 220e0582403Sabdelfattah83 magma_trans_t transA, magma_trans_t transB, 221e0582403Sabdelfattah83 magma_int_t m, magma_int_t n, magma_int_t k, 222e0582403Sabdelfattah83 double alpha, const double *dA, magma_int_t ldda, 223e0582403Sabdelfattah83 const double *dB, magma_int_t lddb, 224e0582403Sabdelfattah83 double beta, double *dC, magma_int_t lddc, 225e0582403Sabdelfattah83 magma_queue_t queue ); 226e0582403Sabdelfattah83 22780a9ef05SNatalie Beams int magma_sgemm_nontensor( 22880a9ef05SNatalie Beams magma_trans_t transA, magma_trans_t transB, 22980a9ef05SNatalie Beams magma_int_t m, magma_int_t n, magma_int_t k, 23080a9ef05SNatalie Beams float alpha, const float *dA, magma_int_t ldda, 23180a9ef05SNatalie Beams const float *dB, magma_int_t lddb, 23280a9ef05SNatalie Beams float beta, float *dC, magma_int_t lddc, 23380a9ef05SNatalie Beams magma_queue_t queue ); 23480a9ef05SNatalie Beams 2357f5b9731SStan Tomov magma_int_t 2367f5b9731SStan Tomov magma_isdevptr(const void *A); 2377f5b9731SStan Tomov 238868539c2SNatalie Beams int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, 239868539c2SNatalie Beams CeedInt Q1d, 240868539c2SNatalie Beams const CeedScalar *interp1d, 241868539c2SNatalie Beams const CeedScalar *grad1d, 242868539c2SNatalie Beams const CeedScalar *qref1d, 243868539c2SNatalie Beams const CeedScalar *qweight1d, 244868539c2SNatalie Beams CeedBasis basis); 2457f5b9731SStan Tomov 246868539c2SNatalie Beams int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, 247d4f68153Sjeremylt CeedInt ndof, CeedInt nqpts, 248d4f68153Sjeremylt const CeedScalar *interp, 249d4f68153Sjeremylt const CeedScalar *grad, 250d4f68153Sjeremylt const CeedScalar *qref, 251d4f68153Sjeremylt const CeedScalar *qweight, 252d4f68153Sjeremylt CeedBasis basis); 253868539c2SNatalie Beams 254868539c2SNatalie Beams int CeedElemRestrictionCreate_Magma(CeedMemType mtype, 255868539c2SNatalie Beams CeedCopyMode cmode, 256d655899aSNatalie Beams const CeedInt *offsets, 257868539c2SNatalie Beams CeedElemRestriction r); 258868539c2SNatalie Beams 259868539c2SNatalie Beams int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype, 260868539c2SNatalie Beams const CeedCopyMode cmode, 261d655899aSNatalie Beams const CeedInt *offsets, 262868539c2SNatalie Beams const CeedElemRestriction res); 263a8c028e3SNatalie Beams 264a8c028e3SNatalie Beams int CeedOperatorCreate_Magma(CeedOperator op); 265a8c028e3SNatalie Beams 2667f5b9731SStan Tomov #ifdef __cplusplus 2677f5b9731SStan Tomov } 2687f5b9731SStan Tomov #endif 2697f5b9731SStan Tomov 2707f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function 2717f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr 2727f5b9731SStan Tomov 273e0582403Sabdelfattah83 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync 274e0582403Sabdelfattah83 // should do nothing 275e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...) 276e0582403Sabdelfattah83 2777f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value> 2787f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE 2797f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000) 2807f5b9731SStan Tomov #endif 281e0582403Sabdelfattah83 2823d576824SJeremy L Thompson #endif // _ceed_magma_h 283