17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details. 47f5b9731SStan Tomov // 57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and 87f5b9731SStan Tomov // source code availability see http://github.com/ceed. 97f5b9731SStan Tomov // 107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early 157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 167f5b9731SStan Tomov 177f5b9731SStan Tomov #include "ceed-magma.h" 187f5b9731SStan Tomov 197f5b9731SStan Tomov #ifdef __cplusplus 207f5b9731SStan Tomov CEED_INTERN "C" 217f5b9731SStan Tomov #endif 227f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, 237f5b9731SStan Tomov CeedTransposeMode tmode, CeedEvalMode emode, 24*3513a710Sjeremylt CeedVector U, CeedVector V) { 257f5b9731SStan Tomov int ierr; 267f5b9731SStan Tomov Ceed ceed; 277f5b9731SStan Tomov ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 287f5b9731SStan Tomov CeedInt dim, ncomp, ndof, nqpt; 297f5b9731SStan Tomov ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 307f5b9731SStan Tomov ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr); 317f5b9731SStan Tomov ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr); 327f5b9731SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr); 337f5b9731SStan Tomov const CeedScalar *u; 347f5b9731SStan Tomov CeedScalar *v; 357f5b9731SStan Tomov if (U) { 367f5b9731SStan Tomov ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChk(ierr); 377f5b9731SStan Tomov } else if (emode != CEED_EVAL_WEIGHT) { 387f5b9731SStan Tomov // LCOV_EXCL_START 397f5b9731SStan Tomov return CeedError(ceed, 1, 407f5b9731SStan Tomov "An input vector is required for this CeedEvalMode"); 417f5b9731SStan Tomov // LCOV_EXCL_STOP 427f5b9731SStan Tomov } 437f5b9731SStan Tomov ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChk(ierr); 447f5b9731SStan Tomov 457f5b9731SStan Tomov CeedBasis_Magma *impl; 467f5b9731SStan Tomov ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr); 477f5b9731SStan Tomov 487f5b9731SStan Tomov CeedInt P1d, Q1d; 497f5b9731SStan Tomov ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr); 507f5b9731SStan Tomov ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr); 517f5b9731SStan Tomov 527f5b9731SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d", 537f5b9731SStan Tomov ncomp*CeedIntPow(P1d, dim), ncomp); 547f5b9731SStan Tomov 557f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 567f5b9731SStan Tomov CeedInt length; 577f5b9731SStan Tomov ierr = CeedVectorGetLength(V, &length); 587f5b9731SStan Tomov magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length); 597f5b9731SStan Tomov } 60*3513a710Sjeremylt switch (emode) { 61*3513a710Sjeremylt case CEED_EVAL_INTERP: { 627f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 637f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 647f5b9731SStan Tomov P = Q1d; Q = P1d; 657f5b9731SStan Tomov } 667f5b9731SStan Tomov 677f5b9731SStan Tomov // Define element sizes for dofs/quad 687f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 697f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 707f5b9731SStan Tomov 717f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 727f5b9731SStan Tomov // elem component 737f5b9731SStan Tomov // component elem 747f5b9731SStan Tomov // node node 757f5b9731SStan Tomov 767f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 777f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 787f5b9731SStan Tomov 797f5b9731SStan Tomov // Element strides 807f5b9731SStan Tomov CeedInt u_elstride = ncomp * eldofssize; 817f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 827f5b9731SStan Tomov // Component strides 837f5b9731SStan Tomov CeedInt u_compstride = eldofssize; 847f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 857f5b9731SStan Tomov 867f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 877f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 887f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 897f5b9731SStan Tomov // Element strides 907f5b9731SStan Tomov v_elstride = ncomp * eldofssize; 917f5b9731SStan Tomov u_elstride = elquadsize; 927f5b9731SStan Tomov // Component strides 937f5b9731SStan Tomov v_compstride = eldofssize; 947f5b9731SStan Tomov u_compstride = nelem * elquadsize; 957f5b9731SStan Tomov } 967f5b9731SStan Tomov 977f5b9731SStan Tomov // Loop through components and apply batch over elements 98*3513a710Sjeremylt for (CeedInt comp_ctr = 0; comp_ctr < ncomp; comp_ctr++) 997f5b9731SStan Tomov magmablas_dbasis_apply_batched_eval_interp(P, Q, dim, ncomp, 1007f5b9731SStan Tomov impl->dinterp1d, tmode, 1017f5b9731SStan Tomov u + u_compstride * comp_ctr, u_elstride, 1027f5b9731SStan Tomov v + v_compstride * comp_ctr, v_elstride, 1037f5b9731SStan Tomov nelem); 1047f5b9731SStan Tomov } 105*3513a710Sjeremylt break; 106*3513a710Sjeremylt case CEED_EVAL_GRAD: { 1077f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 1087f5b9731SStan Tomov // In CEED_NOTRANSPOSE mode: 1097f5b9731SStan Tomov // u is (P^dim x nc), column-major layout (nc = ncomp) 1107f5b9731SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ncomp) 1117f5b9731SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 1127f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1137f5b9731SStan Tomov P = Q1d, Q = P1d; 1147f5b9731SStan Tomov } 1157f5b9731SStan Tomov 1167f5b9731SStan Tomov // Define element sizes for dofs/quad 1177f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 1187f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 1197f5b9731SStan Tomov 1207f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 1217f5b9731SStan Tomov // dim 1227f5b9731SStan Tomov // elem component 1237f5b9731SStan Tomov // component elem 1247f5b9731SStan Tomov // node node 1257f5b9731SStan Tomov 1267f5b9731SStan Tomov 1277f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 1287f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 1297f5b9731SStan Tomov 1307f5b9731SStan Tomov // Element strides 1317f5b9731SStan Tomov CeedInt u_elstride = ncomp * eldofssize; 1327f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 1337f5b9731SStan Tomov // Component strides 1347f5b9731SStan Tomov CeedInt u_compstride = eldofssize; 1357f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 1367f5b9731SStan Tomov // Dimension strides 1377f5b9731SStan Tomov CeedInt u_dimstride = 0; 1387f5b9731SStan Tomov CeedInt v_dimstride = nelem * elquadsize * ncomp; 1397f5b9731SStan Tomov 1407f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 1417f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1427f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 1437f5b9731SStan Tomov // Element strides 1447f5b9731SStan Tomov v_elstride = ncomp * eldofssize; 1457f5b9731SStan Tomov u_elstride = elquadsize; 1467f5b9731SStan Tomov // Component strides 1477f5b9731SStan Tomov v_compstride = eldofssize; 1487f5b9731SStan Tomov u_compstride = nelem * elquadsize; 1497f5b9731SStan Tomov // Dimension strides 1507f5b9731SStan Tomov v_dimstride = 0; 1517f5b9731SStan Tomov u_dimstride = nelem * elquadsize * ncomp; 1527f5b9731SStan Tomov 1537f5b9731SStan Tomov } 1547f5b9731SStan Tomov 1557f5b9731SStan Tomov // Loop through grad dimensions and components, batch call over elements 156*3513a710Sjeremylt for (CeedInt dim_ctr = 0; dim_ctr < dim; dim_ctr++) 157*3513a710Sjeremylt for (CeedInt comp_ctr = 0; comp_ctr < ncomp; comp_ctr++) 1587f5b9731SStan Tomov magmablas_dbasis_apply_batched_eval_grad(P, Q, dim, ncomp, nqpt, 1597f5b9731SStan Tomov impl->dinterp1d, impl->dgrad1d, tmode, 160*3513a710Sjeremylt u + dim_ctr * u_dimstride + u_compstride * comp_ctr, u_elstride, 1617f5b9731SStan Tomov v + dim_ctr * v_dimstride + v_compstride * comp_ctr, 162*3513a710Sjeremylt v_elstride, nelem, dim_ctr); 1637f5b9731SStan Tomov } 164*3513a710Sjeremylt break; 165*3513a710Sjeremylt case CEED_EVAL_WEIGHT: { 1667f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) 1677f5b9731SStan Tomov // LCOV_EXCL_START 1687f5b9731SStan Tomov return CeedError(ceed, 1, 1697f5b9731SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 1707f5b9731SStan Tomov // LCOV_EXCL_STOP 1717f5b9731SStan Tomov CeedInt Q = Q1d; 1727f5b9731SStan Tomov int eldofssize = CeedIntPow(Q, dim); 1737f5b9731SStan Tomov magmablas_dbasis_apply_batched_eval_weight(Q, dim, impl->dqweight1d, 1747f5b9731SStan Tomov v, eldofssize, 1757f5b9731SStan Tomov nelem); 1767f5b9731SStan Tomov } 177*3513a710Sjeremylt break; 178*3513a710Sjeremylt // LCOV_EXCL_START 179*3513a710Sjeremylt case CEED_EVAL_DIV: 180*3513a710Sjeremylt return CeedError(ceed, 1, "CEED_EVAL_DIV not supported"); 181*3513a710Sjeremylt case CEED_EVAL_CURL: 182*3513a710Sjeremylt return CeedError(ceed, 1, "CEED_EVAL_CURL not supported"); 183*3513a710Sjeremylt case CEED_EVAL_NONE: 184*3513a710Sjeremylt return CeedError(ceed, 1, 185*3513a710Sjeremylt "CEED_EVAL_NONE does not make sense in this context"); 186*3513a710Sjeremylt // LCOV_EXCL_STOP 187*3513a710Sjeremylt } 1887f5b9731SStan Tomov 1897f5b9731SStan Tomov if (emode!=CEED_EVAL_WEIGHT) { 1907f5b9731SStan Tomov ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr); 1917f5b9731SStan Tomov } 1927f5b9731SStan Tomov ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr); 1937f5b9731SStan Tomov return 0; 1947f5b9731SStan Tomov } 1957f5b9731SStan Tomov 1967f5b9731SStan Tomov #ifdef __cplusplus 1977f5b9731SStan Tomov CEED_INTERN "C" 1987f5b9731SStan Tomov #endif 199*3513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) { 2007f5b9731SStan Tomov int ierr; 2017f5b9731SStan Tomov CeedBasis_Magma *impl; 2027f5b9731SStan Tomov ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr); 2037f5b9731SStan Tomov 2047f5b9731SStan Tomov ierr = magma_free(impl->dqref1d); CeedChk(ierr); 2057f5b9731SStan Tomov ierr = magma_free(impl->dinterp1d); CeedChk(ierr); 2067f5b9731SStan Tomov ierr = magma_free(impl->dgrad1d); CeedChk(ierr); 2077f5b9731SStan Tomov ierr = magma_free(impl->dqweight1d); CeedChk(ierr); 2087f5b9731SStan Tomov 2097f5b9731SStan Tomov ierr = CeedFree(&impl); CeedChk(ierr); 2107f5b9731SStan Tomov 2117f5b9731SStan Tomov return 0; 2127f5b9731SStan Tomov } 2137f5b9731SStan Tomov 2147f5b9731SStan Tomov #ifdef __cplusplus 2157f5b9731SStan Tomov CEED_INTERN "C" 2167f5b9731SStan Tomov #endif 217*3513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, 218*3513a710Sjeremylt const CeedScalar *interp1d, 2197f5b9731SStan Tomov const CeedScalar *grad1d, 2207f5b9731SStan Tomov const CeedScalar *qref1d, 221*3513a710Sjeremylt const CeedScalar *qweight1d, CeedBasis basis) { 2227f5b9731SStan Tomov int ierr; 2237f5b9731SStan Tomov CeedBasis_Magma *impl; 2247f5b9731SStan Tomov Ceed ceed; 2257f5b9731SStan Tomov ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 2267f5b9731SStan Tomov 2277f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 2287f5b9731SStan Tomov CeedBasisApply_Magma); CeedChk(ierr); 2297f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 2307f5b9731SStan Tomov CeedBasisDestroy_Magma); CeedChk(ierr); 2317f5b9731SStan Tomov 2327f5b9731SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 2337f5b9731SStan Tomov ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr); 2347f5b9731SStan Tomov 2357f5b9731SStan Tomov // Copy qref1d to the GPU 2367f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0])); 2377f5b9731SStan Tomov CeedChk(ierr); 2387f5b9731SStan Tomov magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1); 2397f5b9731SStan Tomov 2407f5b9731SStan Tomov // Copy interp1d to the GPU 2417f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0])); 2427f5b9731SStan Tomov CeedChk(ierr); 2437f5b9731SStan Tomov magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1); 2447f5b9731SStan Tomov 2457f5b9731SStan Tomov // Copy grad1d to the GPU 2467f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0])); 2477f5b9731SStan Tomov CeedChk(ierr); 2487f5b9731SStan Tomov magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1); 2497f5b9731SStan Tomov 2507f5b9731SStan Tomov // Copy qweight1d to the GPU 2517f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0])); 2527f5b9731SStan Tomov CeedChk(ierr); 2537f5b9731SStan Tomov magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1); 2547f5b9731SStan Tomov 2557f5b9731SStan Tomov return 0; 2567f5b9731SStan Tomov } 2577f5b9731SStan Tomov 2587f5b9731SStan Tomov #ifdef __cplusplus 2597f5b9731SStan Tomov CEED_INTERN "C" 2607f5b9731SStan Tomov #endif 261*3513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof, 262*3513a710Sjeremylt CeedInt nqpts, const CeedScalar *interp, 263*3513a710Sjeremylt const CeedScalar *grad, const CeedScalar *qref, 264*3513a710Sjeremylt const CeedScalar *qweight, CeedBasis basis) { 2657f5b9731SStan Tomov int ierr; 2667f5b9731SStan Tomov Ceed ceed; 2677f5b9731SStan Tomov ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 2687f5b9731SStan Tomov 2697f5b9731SStan Tomov return CeedError(ceed, 1, "Backend does not implement non-tensor bases"); 2707f5b9731SStan Tomov } 271