14444f328STzanio // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 24444f328STzanio // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 34444f328STzanio // reserved. See files LICENSE and NOTICE for details. 44444f328STzanio // 54444f328STzanio // This file is part of CEED, a collection of benchmarks, miniapps, software 64444f328STzanio // libraries and APIs for efficient high-order finite element and spectral 74444f328STzanio // element discretizations for exascale applications. For more information and 84444f328STzanio // source code availability see http://github.com/ceed. 94444f328STzanio // 104444f328STzanio // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 114444f328STzanio // a collaborative effort of two U.S. Department of Energy organizations (Office 124444f328STzanio // of Science and the National Nuclear Security Administration) responsible for 134444f328STzanio // the planning and preparation of a capable exascale ecosystem, including 144444f328STzanio // software, applications, hardware, advanced system engineering and early 154444f328STzanio // testbed platforms, in support of the nation's exascale computing imperative. 164444f328STzanio 1790104f39SStan Tomov // magma functions specific to ceed 1890104f39SStan Tomov 197f5b9731SStan Tomov #include <string.h> 20d863ab9bSjeremylt #include <ceed-backend.h> 215a9ca9adSVeselin Dobrev #include "magma.h" 225a9ca9adSVeselin Dobrev 237f5b9731SStan Tomov typedef struct { 247f5b9731SStan Tomov CeedScalar *dqref1d; 257f5b9731SStan Tomov CeedScalar *dinterp1d; 267f5b9731SStan Tomov CeedScalar *dgrad1d; 277f5b9731SStan Tomov CeedScalar *dqweight1d; 287f5b9731SStan Tomov } CeedBasis_Magma; 297f5b9731SStan Tomov 307f5b9731SStan Tomov typedef struct { 31868539c2SNatalie Beams CeedScalar *dqref; 32868539c2SNatalie Beams CeedScalar *dinterp; 33868539c2SNatalie Beams CeedScalar *dgrad; 34868539c2SNatalie Beams CeedScalar *dqweight; 35868539c2SNatalie Beams } CeedBasisNonTensor_Magma; 36868539c2SNatalie Beams 37868539c2SNatalie Beams typedef struct { 38d655899aSNatalie Beams CeedInt *offsets; 39d655899aSNatalie Beams CeedInt *doffsets; 40868539c2SNatalie Beams int own_; 41868539c2SNatalie Beams int down_; // cover a case where we own Device memory 42868539c2SNatalie Beams } CeedElemRestriction_Magma; 43868539c2SNatalie Beams 44868539c2SNatalie Beams typedef struct { 457f5b9731SStan Tomov const CeedScalar **inputs; 467f5b9731SStan Tomov CeedScalar **outputs; 477f5b9731SStan Tomov bool setupdone; 487f5b9731SStan Tomov } CeedQFunction_Magma; 497f5b9731SStan Tomov 5090104f39SStan Tomov #define USE_MAGMA_BATCH 5197ee337cSStan Tomov #define USE_MAGMA_BATCH2 527f5b9731SStan Tomov #define USE_MAGMA_BATCH3 537f5b9731SStan Tomov #define USE_MAGMA_BATCH4 5490104f39SStan Tomov 557f5b9731SStan Tomov #ifdef __cplusplus 567f5b9731SStan Tomov CEED_INTERN { 577f5b9731SStan Tomov #endif 583513a710Sjeremylt void magmablas_dbasis_apply_batched_eval_interp(magma_int_t P, magma_int_t Q, 59868539c2SNatalie Beams magma_int_t dim, magma_int_t ncomp, 60868539c2SNatalie Beams const double *dT, CeedTransposeMode tmode, 61868539c2SNatalie Beams const double *dU, magma_int_t u_elemstride, 62868539c2SNatalie Beams magma_int_t u_compstride, 63868539c2SNatalie Beams double *dV, magma_int_t v_elemstride, 64868539c2SNatalie Beams magma_int_t v_compstride, 65868539c2SNatalie Beams magma_int_t nelem); 667f5b9731SStan Tomov 673513a710Sjeremylt void magmablas_dbasis_apply_batched_eval_grad(magma_int_t P, magma_int_t Q, 68868539c2SNatalie Beams magma_int_t dim, magma_int_t ncomp, 69868539c2SNatalie Beams magma_int_t nqpt, const double* dinterp1d, 70868539c2SNatalie Beams const double *dgrad1d, CeedTransposeMode tmode, 71868539c2SNatalie Beams const double *dU, magma_int_t u_elemstride, 72868539c2SNatalie Beams magma_int_t u_compstride, magma_int_t u_dimstride, 73868539c2SNatalie Beams double *dV, magma_int_t v_elemstride, 74868539c2SNatalie Beams magma_int_t v_compstride, magma_int_t v_dimstride, 75868539c2SNatalie Beams magma_int_t dim_id, magma_int_t nelem); 767f5b9731SStan Tomov 77868539c2SNatalie Beams void magmablas_dbasis_apply_batched_eval_weight(magma_int_t Q, magma_int_t dim, 78868539c2SNatalie Beams const double *dqweight1d, double *dV, 79868539c2SNatalie Beams magma_int_t v_elemstride, 80868539c2SNatalie Beams magma_int_t nelem); 81868539c2SNatalie Beams 82868539c2SNatalie Beams void magma_weight(magma_int_t grid, magma_int_t threads, magma_int_t nelem, 83868539c2SNatalie Beams magma_int_t Q, 84868539c2SNatalie Beams double *dqweight, double *dv); 85868539c2SNatalie Beams 868dc8d968Sjeremylt void magma_readDofsOffset(const magma_int_t NCOMP, 878dc8d968Sjeremylt const magma_int_t compstride, 888dc8d968Sjeremylt const magma_int_t esize, const magma_int_t nelem, 89d655899aSNatalie Beams magma_int_t *offsets, const double *du, double *dv); 90868539c2SNatalie Beams 918dc8d968Sjeremylt void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 92266dd7abSnbeams const magma_int_t nelem, magma_int_t *strides, 93266dd7abSnbeams const double *du, double *dv); 94266dd7abSnbeams 958dc8d968Sjeremylt void magma_writeDofsOffset(const magma_int_t NCOMP, 968dc8d968Sjeremylt const magma_int_t compstride, 978dc8d968Sjeremylt const magma_int_t esize, const magma_int_t nelem, 98d655899aSNatalie Beams magma_int_t *offsets,const double *du, double *dv); 99868539c2SNatalie Beams 1008dc8d968Sjeremylt void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 101266dd7abSnbeams const magma_int_t nelem, magma_int_t *strides, 102266dd7abSnbeams const double *du, double *dv); 103266dd7abSnbeams 1047f5b9731SStan Tomov magma_int_t 1057f5b9731SStan Tomov magma_isdevptr(const void *A); 1067f5b9731SStan Tomov 107868539c2SNatalie Beams int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, 108868539c2SNatalie Beams CeedInt Q1d, 109868539c2SNatalie Beams const CeedScalar *interp1d, 110868539c2SNatalie Beams const CeedScalar *grad1d, 111868539c2SNatalie Beams const CeedScalar *qref1d, 112868539c2SNatalie Beams const CeedScalar *qweight1d, 113868539c2SNatalie Beams CeedBasis basis); 1147f5b9731SStan Tomov 115868539c2SNatalie Beams int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, 116d4f68153Sjeremylt CeedInt ndof, CeedInt nqpts, 117d4f68153Sjeremylt const CeedScalar *interp, 118d4f68153Sjeremylt const CeedScalar *grad, 119d4f68153Sjeremylt const CeedScalar *qref, 120d4f68153Sjeremylt const CeedScalar *qweight, 121d4f68153Sjeremylt CeedBasis basis); 122868539c2SNatalie Beams 123868539c2SNatalie Beams int CeedElemRestrictionCreate_Magma(CeedMemType mtype, 124868539c2SNatalie Beams CeedCopyMode cmode, 125d655899aSNatalie Beams const CeedInt *offsets, 126868539c2SNatalie Beams CeedElemRestriction r); 127868539c2SNatalie Beams 128868539c2SNatalie Beams int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype, 129868539c2SNatalie Beams const CeedCopyMode cmode, 130d655899aSNatalie Beams const CeedInt *offsets, 131868539c2SNatalie Beams const CeedElemRestriction res); 132*a8c028e3SNatalie Beams 133*a8c028e3SNatalie Beams int CeedOperatorCreate_Magma(CeedOperator op); 134*a8c028e3SNatalie Beams 1357f5b9731SStan Tomov #ifdef __cplusplus 1367f5b9731SStan Tomov } 1377f5b9731SStan Tomov #endif 1387f5b9731SStan Tomov 139f68f2f01STzanio #define CeedDebug(...) 1407f5b9731SStan Tomov //#define CeedDebug(format, ...) fprintf(stderr, format, ## __VA_ARGS__) 1417f5b9731SStan Tomov 1427f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function 1437f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr 1447f5b9731SStan Tomov 1457f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value> 1467f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE 1477f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000) 1487f5b9731SStan Tomov #endif 149