14444f328STzanio // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 24444f328STzanio // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 34444f328STzanio // reserved. See files LICENSE and NOTICE for details. 44444f328STzanio // 54444f328STzanio // This file is part of CEED, a collection of benchmarks, miniapps, software 64444f328STzanio // libraries and APIs for efficient high-order finite element and spectral 74444f328STzanio // element discretizations for exascale applications. For more information and 84444f328STzanio // source code availability see http://github.com/ceed. 94444f328STzanio // 104444f328STzanio // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 114444f328STzanio // a collaborative effort of two U.S. Department of Energy organizations (Office 124444f328STzanio // of Science and the National Nuclear Security Administration) responsible for 134444f328STzanio // the planning and preparation of a capable exascale ecosystem, including 144444f328STzanio // software, applications, hardware, advanced system engineering and early 154444f328STzanio // testbed platforms, in support of the nation's exascale computing imperative. 164444f328STzanio 1790104f39SStan Tomov // magma functions specific to ceed 18*e0582403Sabdelfattah83 #ifndef CEED_MAGMA_H 19*e0582403Sabdelfattah83 #define CEED_MAGMA_H 2090104f39SStan Tomov 217f5b9731SStan Tomov #include <string.h> 22d863ab9bSjeremylt #include <ceed-backend.h> 23*e0582403Sabdelfattah83 #include <magma_v2.h> 24*e0582403Sabdelfattah83 25*e0582403Sabdelfattah83 typedef enum { 26*e0582403Sabdelfattah83 MAGMA_KERNEL_DIM_GENERIC=101, 27*e0582403Sabdelfattah83 MAGMA_KERNEL_DIM_SPECIFIC=102 28*e0582403Sabdelfattah83 } magma_kernel_mode_t; 29*e0582403Sabdelfattah83 30*e0582403Sabdelfattah83 typedef struct { 31*e0582403Sabdelfattah83 magma_kernel_mode_t basis_kernel_mode; 32*e0582403Sabdelfattah83 magma_int_t maxthreads[3]; 33*e0582403Sabdelfattah83 magma_device_t device; 34*e0582403Sabdelfattah83 magma_queue_t queue; 35*e0582403Sabdelfattah83 } Ceed_Magma; 365a9ca9adSVeselin Dobrev 377f5b9731SStan Tomov typedef struct { 387f5b9731SStan Tomov CeedScalar *dqref1d; 397f5b9731SStan Tomov CeedScalar *dinterp1d; 407f5b9731SStan Tomov CeedScalar *dgrad1d; 417f5b9731SStan Tomov CeedScalar *dqweight1d; 427f5b9731SStan Tomov } CeedBasis_Magma; 437f5b9731SStan Tomov 447f5b9731SStan Tomov typedef struct { 45868539c2SNatalie Beams CeedScalar *dqref; 46868539c2SNatalie Beams CeedScalar *dinterp; 47868539c2SNatalie Beams CeedScalar *dgrad; 48868539c2SNatalie Beams CeedScalar *dqweight; 49868539c2SNatalie Beams } CeedBasisNonTensor_Magma; 50868539c2SNatalie Beams 51868539c2SNatalie Beams typedef struct { 52d655899aSNatalie Beams CeedInt *offsets; 53d655899aSNatalie Beams CeedInt *doffsets; 54868539c2SNatalie Beams int own_; 55868539c2SNatalie Beams int down_; // cover a case where we own Device memory 56868539c2SNatalie Beams } CeedElemRestriction_Magma; 57868539c2SNatalie Beams 58868539c2SNatalie Beams typedef struct { 597f5b9731SStan Tomov const CeedScalar **inputs; 607f5b9731SStan Tomov CeedScalar **outputs; 617f5b9731SStan Tomov bool setupdone; 627f5b9731SStan Tomov } CeedQFunction_Magma; 637f5b9731SStan Tomov 6490104f39SStan Tomov #define USE_MAGMA_BATCH 6597ee337cSStan Tomov #define USE_MAGMA_BATCH2 667f5b9731SStan Tomov #define USE_MAGMA_BATCH3 677f5b9731SStan Tomov #define USE_MAGMA_BATCH4 6890104f39SStan Tomov 697f5b9731SStan Tomov #ifdef __cplusplus 707f5b9731SStan Tomov CEED_INTERN { 717f5b9731SStan Tomov #endif 72*e0582403Sabdelfattah83 73*e0582403Sabdelfattah83 magma_int_t magma_interp_1d( 74*e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 75*e0582403Sabdelfattah83 const CeedScalar *dT, CeedTransposeMode tmode, 76*e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 77*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 78*e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 79*e0582403Sabdelfattah83 80*e0582403Sabdelfattah83 magma_int_t magma_interp_2d( 81*e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 82*e0582403Sabdelfattah83 const CeedScalar *dT, CeedTransposeMode tmode, 83*e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 84*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 85*e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 86*e0582403Sabdelfattah83 87*e0582403Sabdelfattah83 magma_int_t magma_interp_3d( 88*e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 89*e0582403Sabdelfattah83 const CeedScalar *dT, CeedTransposeMode tmode, 90*e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 91*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 92*e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 93*e0582403Sabdelfattah83 94*e0582403Sabdelfattah83 magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q, 95868539c2SNatalie Beams magma_int_t dim, magma_int_t ncomp, 96868539c2SNatalie Beams const double *dT, CeedTransposeMode tmode, 97868539c2SNatalie Beams const double *dU, magma_int_t u_elemstride, 98*e0582403Sabdelfattah83 magma_int_t cstrdU, 99868539c2SNatalie Beams double *dV, magma_int_t v_elemstride, 100*e0582403Sabdelfattah83 magma_int_t cstrdV, 101*e0582403Sabdelfattah83 magma_int_t nelem, magma_queue_t queue); 1027f5b9731SStan Tomov 103*e0582403Sabdelfattah83 magma_int_t magma_interp( 104*e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, 105868539c2SNatalie Beams magma_int_t dim, magma_int_t ncomp, 106*e0582403Sabdelfattah83 const double *dT, CeedTransposeMode tmode, 107*e0582403Sabdelfattah83 const double *dU, magma_int_t estrdU, magma_int_t cstrdU, 108*e0582403Sabdelfattah83 double *dV, magma_int_t estrdV, magma_int_t cstrdV, 109*e0582403Sabdelfattah83 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 1107f5b9731SStan Tomov 111*e0582403Sabdelfattah83 magma_int_t magma_grad_1d( 112*e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 113*e0582403Sabdelfattah83 const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode, 114*e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 115*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 116*e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 117868539c2SNatalie Beams 118*e0582403Sabdelfattah83 magma_int_t magma_gradn_2d( 119*e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 120*e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 121*e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 122*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 123*e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 124*e0582403Sabdelfattah83 125*e0582403Sabdelfattah83 magma_int_t magma_gradt_2d( 126*e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 127*e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 128*e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 129*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 130*e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 131*e0582403Sabdelfattah83 132*e0582403Sabdelfattah83 magma_int_t magma_gradn_3d( 133*e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 134*e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 135*e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 136*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 137*e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 138*e0582403Sabdelfattah83 139*e0582403Sabdelfattah83 magma_int_t magma_gradt_3d( 140*e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 141*e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 142*e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 143*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 144*e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 145*e0582403Sabdelfattah83 146*e0582403Sabdelfattah83 magma_int_t magma_grad_generic( 147*e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 148*e0582403Sabdelfattah83 const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 149*e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 150*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 151*e0582403Sabdelfattah83 magma_int_t nelem, magma_queue_t queue); 152*e0582403Sabdelfattah83 153*e0582403Sabdelfattah83 magma_int_t magma_grad( 154*e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 155*e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 156*e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU, 157*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV, 158*e0582403Sabdelfattah83 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 159*e0582403Sabdelfattah83 160*e0582403Sabdelfattah83 magma_int_t magma_weight_1d( 161*e0582403Sabdelfattah83 magma_int_t Q, const CeedScalar *dqweight1d, 162*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 163*e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 164*e0582403Sabdelfattah83 165*e0582403Sabdelfattah83 magma_int_t magma_weight_2d( 166*e0582403Sabdelfattah83 magma_int_t Q, const CeedScalar *dqweight1d, 167*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 168*e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 169*e0582403Sabdelfattah83 170*e0582403Sabdelfattah83 magma_int_t magma_weight_3d( 171*e0582403Sabdelfattah83 magma_int_t Q, const CeedScalar *dqweight1d, 172*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 173*e0582403Sabdelfattah83 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 174*e0582403Sabdelfattah83 175*e0582403Sabdelfattah83 magma_int_t magma_weight_generic( 176*e0582403Sabdelfattah83 magma_int_t Q, magma_int_t dim, 177*e0582403Sabdelfattah83 const CeedScalar *dqweight1d, 178*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t vstride, 179*e0582403Sabdelfattah83 magma_int_t nelem, magma_queue_t queue); 180*e0582403Sabdelfattah83 181*e0582403Sabdelfattah83 magma_int_t magma_weight( 182*e0582403Sabdelfattah83 magma_int_t Q, magma_int_t dim, 183*e0582403Sabdelfattah83 const CeedScalar *dqweight1d, 184*e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 185*e0582403Sabdelfattah83 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 186*e0582403Sabdelfattah83 187*e0582403Sabdelfattah83 void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem, 188868539c2SNatalie Beams magma_int_t Q, 189*e0582403Sabdelfattah83 double *dqweight, double *dv, magma_queue_t queue); 190*e0582403Sabdelfattah83 191868539c2SNatalie Beams 1928dc8d968Sjeremylt void magma_readDofsOffset(const magma_int_t NCOMP, 1938dc8d968Sjeremylt const magma_int_t compstride, 1948dc8d968Sjeremylt const magma_int_t esize, const magma_int_t nelem, 195*e0582403Sabdelfattah83 magma_int_t *offsets, const double *du, double *dv, 196*e0582403Sabdelfattah83 magma_queue_t queue); 197868539c2SNatalie Beams 1988dc8d968Sjeremylt void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 199266dd7abSnbeams const magma_int_t nelem, magma_int_t *strides, 200*e0582403Sabdelfattah83 const double *du, double *dv, 201*e0582403Sabdelfattah83 magma_queue_t queue); 202266dd7abSnbeams 2038dc8d968Sjeremylt void magma_writeDofsOffset(const magma_int_t NCOMP, 2048dc8d968Sjeremylt const magma_int_t compstride, 2058dc8d968Sjeremylt const magma_int_t esize, const magma_int_t nelem, 206*e0582403Sabdelfattah83 magma_int_t *offsets,const double *du, double *dv, 207*e0582403Sabdelfattah83 magma_queue_t queue); 208868539c2SNatalie Beams 2098dc8d968Sjeremylt void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 210266dd7abSnbeams const magma_int_t nelem, magma_int_t *strides, 211*e0582403Sabdelfattah83 const double *du, double *dv, 212*e0582403Sabdelfattah83 magma_queue_t queue); 213*e0582403Sabdelfattah83 214*e0582403Sabdelfattah83 int magma_dgemm_nontensor( 215*e0582403Sabdelfattah83 magma_trans_t transA, magma_trans_t transB, 216*e0582403Sabdelfattah83 magma_int_t m, magma_int_t n, magma_int_t k, 217*e0582403Sabdelfattah83 double alpha, const double *dA, magma_int_t ldda, 218*e0582403Sabdelfattah83 const double *dB, magma_int_t lddb, 219*e0582403Sabdelfattah83 double beta, double *dC, magma_int_t lddc, 220*e0582403Sabdelfattah83 magma_queue_t queue ); 221*e0582403Sabdelfattah83 222266dd7abSnbeams 2237f5b9731SStan Tomov magma_int_t 2247f5b9731SStan Tomov magma_isdevptr(const void *A); 2257f5b9731SStan Tomov 226868539c2SNatalie Beams int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, 227868539c2SNatalie Beams CeedInt Q1d, 228868539c2SNatalie Beams const CeedScalar *interp1d, 229868539c2SNatalie Beams const CeedScalar *grad1d, 230868539c2SNatalie Beams const CeedScalar *qref1d, 231868539c2SNatalie Beams const CeedScalar *qweight1d, 232868539c2SNatalie Beams CeedBasis basis); 2337f5b9731SStan Tomov 234868539c2SNatalie Beams int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, 235d4f68153Sjeremylt CeedInt ndof, CeedInt nqpts, 236d4f68153Sjeremylt const CeedScalar *interp, 237d4f68153Sjeremylt const CeedScalar *grad, 238d4f68153Sjeremylt const CeedScalar *qref, 239d4f68153Sjeremylt const CeedScalar *qweight, 240d4f68153Sjeremylt CeedBasis basis); 241868539c2SNatalie Beams 242868539c2SNatalie Beams int CeedElemRestrictionCreate_Magma(CeedMemType mtype, 243868539c2SNatalie Beams CeedCopyMode cmode, 244d655899aSNatalie Beams const CeedInt *offsets, 245868539c2SNatalie Beams CeedElemRestriction r); 246868539c2SNatalie Beams 247868539c2SNatalie Beams int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype, 248868539c2SNatalie Beams const CeedCopyMode cmode, 249d655899aSNatalie Beams const CeedInt *offsets, 250868539c2SNatalie Beams const CeedElemRestriction res); 251a8c028e3SNatalie Beams 252a8c028e3SNatalie Beams int CeedOperatorCreate_Magma(CeedOperator op); 253a8c028e3SNatalie Beams 2547f5b9731SStan Tomov #ifdef __cplusplus 2557f5b9731SStan Tomov } 2567f5b9731SStan Tomov #endif 2577f5b9731SStan Tomov 258f68f2f01STzanio #define CeedDebug(...) 2597f5b9731SStan Tomov //#define CeedDebug(format, ...) fprintf(stderr, format, ## __VA_ARGS__) 2607f5b9731SStan Tomov 2617f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function 2627f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr 2637f5b9731SStan Tomov 264*e0582403Sabdelfattah83 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync 265*e0582403Sabdelfattah83 // should do nothing 266*e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...) 267*e0582403Sabdelfattah83 2687f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value> 2697f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE 2707f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000) 2717f5b9731SStan Tomov #endif 272*e0582403Sabdelfattah83 273*e0582403Sabdelfattah83 #endif // CEED_MAGMA_H 274