17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details. 47f5b9731SStan Tomov // 57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and 87f5b9731SStan Tomov // source code availability see http://github.com/ceed. 97f5b9731SStan Tomov // 107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early 157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 167f5b9731SStan Tomov 177f5b9731SStan Tomov #include "ceed-magma.h" 187f5b9731SStan Tomov 197f5b9731SStan Tomov #ifdef __cplusplus 207f5b9731SStan Tomov CEED_INTERN "C" 217f5b9731SStan Tomov #endif 227f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, 237f5b9731SStan Tomov CeedTransposeMode tmode, CeedEvalMode emode, 243513a710Sjeremylt CeedVector U, CeedVector V) { 257f5b9731SStan Tomov int ierr; 267f5b9731SStan Tomov Ceed ceed; 277f5b9731SStan Tomov ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 287f5b9731SStan Tomov CeedInt dim, ncomp, ndof, nqpt; 297f5b9731SStan Tomov ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 307f5b9731SStan Tomov ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr); 317f5b9731SStan Tomov ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr); 327f5b9731SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr); 337f5b9731SStan Tomov const CeedScalar *u; 347f5b9731SStan Tomov CeedScalar *v; 35*868539c2SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 367f5b9731SStan Tomov ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChk(ierr); 377f5b9731SStan Tomov } else if (emode != CEED_EVAL_WEIGHT) { 387f5b9731SStan Tomov // LCOV_EXCL_START 397f5b9731SStan Tomov return CeedError(ceed, 1, 407f5b9731SStan Tomov "An input vector is required for this CeedEvalMode"); 417f5b9731SStan Tomov // LCOV_EXCL_STOP 427f5b9731SStan Tomov } 437f5b9731SStan Tomov ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChk(ierr); 447f5b9731SStan Tomov 457f5b9731SStan Tomov CeedBasis_Magma *impl; 467f5b9731SStan Tomov ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr); 477f5b9731SStan Tomov 487f5b9731SStan Tomov CeedInt P1d, Q1d; 497f5b9731SStan Tomov ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr); 507f5b9731SStan Tomov ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr); 517f5b9731SStan Tomov 527f5b9731SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d", 537f5b9731SStan Tomov ncomp*CeedIntPow(P1d, dim), ncomp); 547f5b9731SStan Tomov 557f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 567f5b9731SStan Tomov CeedInt length; 577f5b9731SStan Tomov ierr = CeedVectorGetLength(V, &length); 587f5b9731SStan Tomov magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length); 597f5b9731SStan Tomov } 603513a710Sjeremylt switch (emode) { 613513a710Sjeremylt case CEED_EVAL_INTERP: { 627f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 637f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 647f5b9731SStan Tomov P = Q1d; Q = P1d; 657f5b9731SStan Tomov } 667f5b9731SStan Tomov 677f5b9731SStan Tomov // Define element sizes for dofs/quad 687f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 697f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 707f5b9731SStan Tomov 717f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 72*868539c2SNatalie Beams // component component 73*868539c2SNatalie Beams // elem elem 747f5b9731SStan Tomov // node node 757f5b9731SStan Tomov 767f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 777f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 787f5b9731SStan Tomov 797f5b9731SStan Tomov // Element strides 80*868539c2SNatalie Beams CeedInt u_elstride = eldofssize; 817f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 827f5b9731SStan Tomov // Component strides 83*868539c2SNatalie Beams CeedInt u_compstride = nelem * eldofssize; 847f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 857f5b9731SStan Tomov 867f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 877f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 887f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 897f5b9731SStan Tomov // Element strides 90*868539c2SNatalie Beams v_elstride = eldofssize; 917f5b9731SStan Tomov u_elstride = elquadsize; 927f5b9731SStan Tomov // Component strides 93*868539c2SNatalie Beams v_compstride = nelem * eldofssize; 947f5b9731SStan Tomov u_compstride = nelem * elquadsize; 957f5b9731SStan Tomov } 967f5b9731SStan Tomov 977f5b9731SStan Tomov // Loop through components and apply batch over elements 987f5b9731SStan Tomov magmablas_dbasis_apply_batched_eval_interp(P, Q, dim, ncomp, 997f5b9731SStan Tomov impl->dinterp1d, tmode, 100*868539c2SNatalie Beams u, u_elstride, u_compstride, 101*868539c2SNatalie Beams v, v_elstride, v_compstride, 1027f5b9731SStan Tomov nelem); 1037f5b9731SStan Tomov } 1043513a710Sjeremylt break; 1053513a710Sjeremylt case CEED_EVAL_GRAD: { 1067f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 1077f5b9731SStan Tomov // In CEED_NOTRANSPOSE mode: 1087f5b9731SStan Tomov // u is (P^dim x nc), column-major layout (nc = ncomp) 1097f5b9731SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ncomp) 1107f5b9731SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 1117f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1127f5b9731SStan Tomov P = Q1d, Q = P1d; 1137f5b9731SStan Tomov } 1147f5b9731SStan Tomov 1157f5b9731SStan Tomov // Define element sizes for dofs/quad 1167f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 1177f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 1187f5b9731SStan Tomov 1197f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 1207f5b9731SStan Tomov // dim 121*868539c2SNatalie Beams // component component 122*868539c2SNatalie Beams // elem elem 1237f5b9731SStan Tomov // node node 1247f5b9731SStan Tomov 1257f5b9731SStan Tomov 1267f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 1277f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 1287f5b9731SStan Tomov 1297f5b9731SStan Tomov // Element strides 130*868539c2SNatalie Beams CeedInt u_elstride = eldofssize; 1317f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 1327f5b9731SStan Tomov // Component strides 133*868539c2SNatalie Beams CeedInt u_compstride = nelem * eldofssize; 1347f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 1357f5b9731SStan Tomov // Dimension strides 1367f5b9731SStan Tomov CeedInt u_dimstride = 0; 1377f5b9731SStan Tomov CeedInt v_dimstride = nelem * elquadsize * ncomp; 1387f5b9731SStan Tomov 1397f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 1407f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1417f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 1427f5b9731SStan Tomov // Element strides 143*868539c2SNatalie Beams v_elstride = eldofssize; 1447f5b9731SStan Tomov u_elstride = elquadsize; 1457f5b9731SStan Tomov // Component strides 146*868539c2SNatalie Beams v_compstride = nelem * eldofssize; 1477f5b9731SStan Tomov u_compstride = nelem * elquadsize; 1487f5b9731SStan Tomov // Dimension strides 1497f5b9731SStan Tomov v_dimstride = 0; 1507f5b9731SStan Tomov u_dimstride = nelem * elquadsize * ncomp; 1517f5b9731SStan Tomov 1527f5b9731SStan Tomov } 1537f5b9731SStan Tomov 154*868539c2SNatalie Beams // Loop through grad dimensions only, batch call over elements and components 1553513a710Sjeremylt for (CeedInt dim_ctr = 0; dim_ctr < dim; dim_ctr++) 1567f5b9731SStan Tomov magmablas_dbasis_apply_batched_eval_grad(P, Q, dim, ncomp, nqpt, 1577f5b9731SStan Tomov impl->dinterp1d, impl->dgrad1d, tmode, 158*868539c2SNatalie Beams u + dim_ctr * u_dimstride, u_elstride, u_compstride, u_dimstride, 159*868539c2SNatalie Beams v + dim_ctr * v_dimstride, v_elstride, v_compstride, v_dimstride, 160*868539c2SNatalie Beams dim_ctr, nelem); 1617f5b9731SStan Tomov } 1623513a710Sjeremylt break; 1633513a710Sjeremylt case CEED_EVAL_WEIGHT: { 1647f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) 1657f5b9731SStan Tomov // LCOV_EXCL_START 1667f5b9731SStan Tomov return CeedError(ceed, 1, 1677f5b9731SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 1687f5b9731SStan Tomov // LCOV_EXCL_STOP 1697f5b9731SStan Tomov CeedInt Q = Q1d; 1707f5b9731SStan Tomov int eldofssize = CeedIntPow(Q, dim); 1717f5b9731SStan Tomov magmablas_dbasis_apply_batched_eval_weight(Q, dim, impl->dqweight1d, 1727f5b9731SStan Tomov v, eldofssize, 1737f5b9731SStan Tomov nelem); 1747f5b9731SStan Tomov } 1753513a710Sjeremylt break; 1763513a710Sjeremylt // LCOV_EXCL_START 1773513a710Sjeremylt case CEED_EVAL_DIV: 1783513a710Sjeremylt return CeedError(ceed, 1, "CEED_EVAL_DIV not supported"); 1793513a710Sjeremylt case CEED_EVAL_CURL: 1803513a710Sjeremylt return CeedError(ceed, 1, "CEED_EVAL_CURL not supported"); 1813513a710Sjeremylt case CEED_EVAL_NONE: 1823513a710Sjeremylt return CeedError(ceed, 1, 1833513a710Sjeremylt "CEED_EVAL_NONE does not make sense in this context"); 1843513a710Sjeremylt // LCOV_EXCL_STOP 1853513a710Sjeremylt } 1867f5b9731SStan Tomov 1877f5b9731SStan Tomov if (emode!=CEED_EVAL_WEIGHT) { 1887f5b9731SStan Tomov ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr); 1897f5b9731SStan Tomov } 1907f5b9731SStan Tomov ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr); 1917f5b9731SStan Tomov return 0; 1927f5b9731SStan Tomov } 1937f5b9731SStan Tomov 1947f5b9731SStan Tomov #ifdef __cplusplus 1957f5b9731SStan Tomov CEED_INTERN "C" 1967f5b9731SStan Tomov #endif 197*868539c2SNatalie Beams int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt nelem, 198*868539c2SNatalie Beams CeedTransposeMode tmode, CeedEvalMode emode, 199*868539c2SNatalie Beams CeedVector U, CeedVector V) { 200*868539c2SNatalie Beams int ierr; 201*868539c2SNatalie Beams Ceed ceed; 202*868539c2SNatalie Beams ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 203*868539c2SNatalie Beams CeedInt dim, ncomp, ndof, nqpt; 204*868539c2SNatalie Beams ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 205*868539c2SNatalie Beams ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr); 206*868539c2SNatalie Beams ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr); 207*868539c2SNatalie Beams ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr); 208*868539c2SNatalie Beams const CeedScalar *du; 209*868539c2SNatalie Beams CeedScalar *dv; 210*868539c2SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 211*868539c2SNatalie Beams ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChk(ierr); 212*868539c2SNatalie Beams } else if (emode != CEED_EVAL_WEIGHT) { 213*868539c2SNatalie Beams // LCOV_EXCL_START 214*868539c2SNatalie Beams return CeedError(ceed, 1, 215*868539c2SNatalie Beams "An input vector is required for this CeedEvalMode"); 216*868539c2SNatalie Beams // LCOV_EXCL_STOP 217*868539c2SNatalie Beams } 218*868539c2SNatalie Beams ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChk(ierr); 219*868539c2SNatalie Beams 220*868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 221*868539c2SNatalie Beams ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr); 222*868539c2SNatalie Beams 223*868539c2SNatalie Beams CeedDebug("\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d", 224*868539c2SNatalie Beams ncomp*ndof, ncomp); 225*868539c2SNatalie Beams 226*868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) { 227*868539c2SNatalie Beams CeedInt length; 228*868539c2SNatalie Beams ierr = CeedVectorGetLength(V, &length); 229*868539c2SNatalie Beams magmablas_dlaset(MagmaFull, length, 1, 0., 0., dv, length); 230*868539c2SNatalie Beams } 231*868539c2SNatalie Beams switch (emode) { 232*868539c2SNatalie Beams case CEED_EVAL_INTERP: { 233*868539c2SNatalie Beams CeedInt P = ndof, Q = nqpt; 234*868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) 235*868539c2SNatalie Beams magma_dgemm(MagmaNoTrans, MagmaNoTrans, 236*868539c2SNatalie Beams P, nelem*ncomp, Q, 237*868539c2SNatalie Beams 1.0, impl->dinterp, P, 238*868539c2SNatalie Beams du, Q, 239*868539c2SNatalie Beams 0.0, dv, P); 240*868539c2SNatalie Beams else 241*868539c2SNatalie Beams magma_dgemm(MagmaTrans, MagmaNoTrans, 242*868539c2SNatalie Beams Q, nelem*ncomp, P, 243*868539c2SNatalie Beams 1.0, impl->dinterp, P, 244*868539c2SNatalie Beams du, P, 245*868539c2SNatalie Beams 0.0, dv, Q); 246*868539c2SNatalie Beams } 247*868539c2SNatalie Beams break; 248*868539c2SNatalie Beams 249*868539c2SNatalie Beams case CEED_EVAL_GRAD: { 250*868539c2SNatalie Beams CeedInt P = ndof, Q = nqpt; 251*868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) { 252*868539c2SNatalie Beams double beta = 0.0; 253*868539c2SNatalie Beams for(int d=0; d<dim; d++) { 254*868539c2SNatalie Beams if (d>0) 255*868539c2SNatalie Beams beta = 1.0; 256*868539c2SNatalie Beams magma_dgemm(MagmaNoTrans, MagmaNoTrans, 257*868539c2SNatalie Beams P, nelem*ncomp, Q, 258*868539c2SNatalie Beams 1.0, impl->dgrad + d*P*Q, P, 259*868539c2SNatalie Beams du + d*nelem*ncomp*Q, Q, 260*868539c2SNatalie Beams beta, dv, P); 261*868539c2SNatalie Beams } 262*868539c2SNatalie Beams } else { 263*868539c2SNatalie Beams for(int d=0; d< dim; d++) 264*868539c2SNatalie Beams magma_dgemm(MagmaTrans, MagmaNoTrans, 265*868539c2SNatalie Beams Q, nelem*ncomp, P, 266*868539c2SNatalie Beams 1.0, impl->dgrad + d*P*Q, P, 267*868539c2SNatalie Beams du, P, 268*868539c2SNatalie Beams 0.0, dv + d*nelem*ncomp*Q, Q); 269*868539c2SNatalie Beams } 270*868539c2SNatalie Beams } 271*868539c2SNatalie Beams break; 272*868539c2SNatalie Beams 273*868539c2SNatalie Beams case CEED_EVAL_WEIGHT: { 274*868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) 275*868539c2SNatalie Beams // LCOV_EXCL_START 276*868539c2SNatalie Beams return CeedError(ceed, 1, 277*868539c2SNatalie Beams "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 278*868539c2SNatalie Beams // LCOV_EXCL_STOP 279*868539c2SNatalie Beams 280*868539c2SNatalie Beams int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1; 281*868539c2SNatalie Beams int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)? 282*868539c2SNatalie Beams 1 : 0 ); 283*868539c2SNatalie Beams magma_weight(grid, nqpt, nelem, nqpt, impl->dqweight, dv); 284*868539c2SNatalie Beams CeedChk(ierr); 285*868539c2SNatalie Beams } 286*868539c2SNatalie Beams break; 287*868539c2SNatalie Beams 288*868539c2SNatalie Beams // LCOV_EXCL_START 289*868539c2SNatalie Beams case CEED_EVAL_DIV: 290*868539c2SNatalie Beams return CeedError(ceed, 1, "CEED_EVAL_DIV not supported"); 291*868539c2SNatalie Beams case CEED_EVAL_CURL: 292*868539c2SNatalie Beams return CeedError(ceed, 1, "CEED_EVAL_CURL not supported"); 293*868539c2SNatalie Beams case CEED_EVAL_NONE: 294*868539c2SNatalie Beams return CeedError(ceed, 1, 295*868539c2SNatalie Beams "CEED_EVAL_NONE does not make sense in this context"); 296*868539c2SNatalie Beams // LCOV_EXCL_STOP 297*868539c2SNatalie Beams } 298*868539c2SNatalie Beams 299*868539c2SNatalie Beams if(emode!=CEED_EVAL_WEIGHT) { 300*868539c2SNatalie Beams ierr = CeedVectorRestoreArrayRead(U, &du); CeedChk(ierr); 301*868539c2SNatalie Beams } 302*868539c2SNatalie Beams ierr = CeedVectorRestoreArray(V, &dv); CeedChk(ierr); 303*868539c2SNatalie Beams return 0; 304*868539c2SNatalie Beams } 305*868539c2SNatalie Beams 306*868539c2SNatalie Beams #ifdef __cplusplus 307*868539c2SNatalie Beams CEED_INTERN "C" 308*868539c2SNatalie Beams #endif 3093513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) { 3107f5b9731SStan Tomov int ierr; 3117f5b9731SStan Tomov CeedBasis_Magma *impl; 3127f5b9731SStan Tomov ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr); 3137f5b9731SStan Tomov 3147f5b9731SStan Tomov ierr = magma_free(impl->dqref1d); CeedChk(ierr); 3157f5b9731SStan Tomov ierr = magma_free(impl->dinterp1d); CeedChk(ierr); 3167f5b9731SStan Tomov ierr = magma_free(impl->dgrad1d); CeedChk(ierr); 3177f5b9731SStan Tomov ierr = magma_free(impl->dqweight1d); CeedChk(ierr); 3187f5b9731SStan Tomov 3197f5b9731SStan Tomov ierr = CeedFree(&impl); CeedChk(ierr); 3207f5b9731SStan Tomov 3217f5b9731SStan Tomov return 0; 3227f5b9731SStan Tomov } 3237f5b9731SStan Tomov 3247f5b9731SStan Tomov #ifdef __cplusplus 3257f5b9731SStan Tomov CEED_INTERN "C" 3267f5b9731SStan Tomov #endif 327*868539c2SNatalie Beams int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) { 328*868539c2SNatalie Beams int ierr; 329*868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 330*868539c2SNatalie Beams ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr); 331*868539c2SNatalie Beams 332*868539c2SNatalie Beams ierr = magma_free(impl->dqref); CeedChk(ierr); 333*868539c2SNatalie Beams ierr = magma_free(impl->dinterp); CeedChk(ierr); 334*868539c2SNatalie Beams ierr = magma_free(impl->dgrad); CeedChk(ierr); 335*868539c2SNatalie Beams ierr = magma_free(impl->dqweight); CeedChk(ierr); 336*868539c2SNatalie Beams 337*868539c2SNatalie Beams ierr = CeedFree(&impl); CeedChk(ierr); 338*868539c2SNatalie Beams 339*868539c2SNatalie Beams return 0; 340*868539c2SNatalie Beams } 341*868539c2SNatalie Beams 342*868539c2SNatalie Beams #ifdef __cplusplus 343*868539c2SNatalie Beams CEED_INTERN "C" 344*868539c2SNatalie Beams #endif 3453513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, 3463513a710Sjeremylt const CeedScalar *interp1d, 3477f5b9731SStan Tomov const CeedScalar *grad1d, 3487f5b9731SStan Tomov const CeedScalar *qref1d, 3493513a710Sjeremylt const CeedScalar *qweight1d, CeedBasis basis) { 3507f5b9731SStan Tomov int ierr; 3517f5b9731SStan Tomov CeedBasis_Magma *impl; 3527f5b9731SStan Tomov Ceed ceed; 3537f5b9731SStan Tomov ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 3547f5b9731SStan Tomov 3557f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 3567f5b9731SStan Tomov CeedBasisApply_Magma); CeedChk(ierr); 3577f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 3587f5b9731SStan Tomov CeedBasisDestroy_Magma); CeedChk(ierr); 3597f5b9731SStan Tomov 3607f5b9731SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 3617f5b9731SStan Tomov ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr); 3627f5b9731SStan Tomov 3637f5b9731SStan Tomov // Copy qref1d to the GPU 3647f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0])); 3657f5b9731SStan Tomov CeedChk(ierr); 3667f5b9731SStan Tomov magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1); 3677f5b9731SStan Tomov 3687f5b9731SStan Tomov // Copy interp1d to the GPU 3697f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0])); 3707f5b9731SStan Tomov CeedChk(ierr); 3717f5b9731SStan Tomov magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1); 3727f5b9731SStan Tomov 3737f5b9731SStan Tomov // Copy grad1d to the GPU 3747f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0])); 3757f5b9731SStan Tomov CeedChk(ierr); 3767f5b9731SStan Tomov magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1); 3777f5b9731SStan Tomov 3787f5b9731SStan Tomov // Copy qweight1d to the GPU 3797f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0])); 3807f5b9731SStan Tomov CeedChk(ierr); 3817f5b9731SStan Tomov magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1); 3827f5b9731SStan Tomov 3837f5b9731SStan Tomov return 0; 3847f5b9731SStan Tomov } 3857f5b9731SStan Tomov 3867f5b9731SStan Tomov #ifdef __cplusplus 3877f5b9731SStan Tomov CEED_INTERN "C" 3887f5b9731SStan Tomov #endif 3893513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof, 3903513a710Sjeremylt CeedInt nqpts, const CeedScalar *interp, 3913513a710Sjeremylt const CeedScalar *grad, const CeedScalar *qref, 3923513a710Sjeremylt const CeedScalar *qweight, CeedBasis basis) { 3937f5b9731SStan Tomov int ierr; 394*868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 3957f5b9731SStan Tomov Ceed ceed; 3967f5b9731SStan Tomov ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 3977f5b9731SStan Tomov 398*868539c2SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 399*868539c2SNatalie Beams CeedBasisApplyNonTensor_Magma); CeedChk(ierr); 400*868539c2SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 401*868539c2SNatalie Beams CeedBasisDestroyNonTensor_Magma); CeedChk(ierr); 402*868539c2SNatalie Beams 403*868539c2SNatalie Beams ierr = CeedCalloc(1,&impl); CeedChk(ierr); 404*868539c2SNatalie Beams ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr); 405*868539c2SNatalie Beams 406*868539c2SNatalie Beams // Copy qref to the GPU 407*868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0])); 408*868539c2SNatalie Beams CeedChk(ierr); 409*868539c2SNatalie Beams magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1); 410*868539c2SNatalie Beams 411*868539c2SNatalie Beams // Copy interp to the GPU 412*868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0])); 413*868539c2SNatalie Beams CeedChk(ierr); 414*868539c2SNatalie Beams magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1); 415*868539c2SNatalie Beams 416*868539c2SNatalie Beams // Copy grad to the GPU 417*868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0])); 418*868539c2SNatalie Beams CeedChk(ierr); 419*868539c2SNatalie Beams magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1); 420*868539c2SNatalie Beams 421*868539c2SNatalie Beams // Copy qweight to the GPU 422*868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0])); 423*868539c2SNatalie Beams CeedChk(ierr); 424*868539c2SNatalie Beams magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1); 425*868539c2SNatalie Beams 426*868539c2SNatalie Beams return 0; 4277f5b9731SStan Tomov } 428