17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details. 47f5b9731SStan Tomov // 57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and 87f5b9731SStan Tomov // source code availability see http://github.com/ceed. 97f5b9731SStan Tomov // 107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early 157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 167f5b9731SStan Tomov 173d576824SJeremy L Thompson #include <ceed.h> 183d576824SJeremy L Thompson #include <ceed-backend.h> 197f5b9731SStan Tomov #include "ceed-magma.h" 207f5b9731SStan Tomov 217f5b9731SStan Tomov #ifdef __cplusplus 227f5b9731SStan Tomov CEED_INTERN "C" 237f5b9731SStan Tomov #endif 247f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, 257f5b9731SStan Tomov CeedTransposeMode tmode, CeedEvalMode emode, 263513a710Sjeremylt CeedVector U, CeedVector V) { 277f5b9731SStan Tomov int ierr; 287f5b9731SStan Tomov Ceed ceed; 29*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 30e0582403Sabdelfattah83 CeedInt dim, ncomp, ndof; 31*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 32*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 33*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr); 34e0582403Sabdelfattah83 35e0582403Sabdelfattah83 Ceed_Magma *data; 36*e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 37e0582403Sabdelfattah83 387f5b9731SStan Tomov const CeedScalar *u; 397f5b9731SStan Tomov CeedScalar *v; 40868539c2SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 41*e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChkBackend(ierr); 427f5b9731SStan Tomov } else if (emode != CEED_EVAL_WEIGHT) { 437f5b9731SStan Tomov // LCOV_EXCL_START 44*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 457f5b9731SStan Tomov "An input vector is required for this CeedEvalMode"); 467f5b9731SStan Tomov // LCOV_EXCL_STOP 477f5b9731SStan Tomov } 48*e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChkBackend(ierr); 497f5b9731SStan Tomov 507f5b9731SStan Tomov CeedBasis_Magma *impl; 51*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 527f5b9731SStan Tomov 537f5b9731SStan Tomov CeedInt P1d, Q1d; 54*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChkBackend(ierr); 55*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChkBackend(ierr); 567f5b9731SStan Tomov 577f5b9731SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d", 587f5b9731SStan Tomov ncomp*CeedIntPow(P1d, dim), ncomp); 597f5b9731SStan Tomov 607f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 617f5b9731SStan Tomov CeedInt length; 62*e15f9bd0SJeremy L Thompson ierr = CeedVectorGetLength(V, &length); CeedChkBackend(ierr); 63e0582403Sabdelfattah83 magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length, data->queue); 64e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 657f5b9731SStan Tomov } 663513a710Sjeremylt switch (emode) { 673513a710Sjeremylt case CEED_EVAL_INTERP: { 687f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 697f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 707f5b9731SStan Tomov P = Q1d; Q = P1d; 717f5b9731SStan Tomov } 727f5b9731SStan Tomov 737f5b9731SStan Tomov // Define element sizes for dofs/quad 747f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 757f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 767f5b9731SStan Tomov 777f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 78868539c2SNatalie Beams // component component 79868539c2SNatalie Beams // elem elem 807f5b9731SStan Tomov // node node 817f5b9731SStan Tomov 827f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 837f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 847f5b9731SStan Tomov 857f5b9731SStan Tomov // Element strides 86868539c2SNatalie Beams CeedInt u_elstride = eldofssize; 877f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 887f5b9731SStan Tomov // Component strides 89868539c2SNatalie Beams CeedInt u_compstride = nelem * eldofssize; 907f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 917f5b9731SStan Tomov 927f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 937f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 947f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 957f5b9731SStan Tomov // Element strides 96868539c2SNatalie Beams v_elstride = eldofssize; 977f5b9731SStan Tomov u_elstride = elquadsize; 987f5b9731SStan Tomov // Component strides 99868539c2SNatalie Beams v_compstride = nelem * eldofssize; 1007f5b9731SStan Tomov u_compstride = nelem * elquadsize; 1017f5b9731SStan Tomov } 1027f5b9731SStan Tomov 103e0582403Sabdelfattah83 ierr = magma_interp(P, Q, dim, ncomp, 1047f5b9731SStan Tomov impl->dinterp1d, tmode, 105868539c2SNatalie Beams u, u_elstride, u_compstride, 106868539c2SNatalie Beams v, v_elstride, v_compstride, 107e0582403Sabdelfattah83 nelem, data->basis_kernel_mode, data->maxthreads, 108e0582403Sabdelfattah83 data->queue); 109*e15f9bd0SJeremy L Thompson if (ierr != 0) CeedError(ceed, CEED_ERROR_BACKEND, 110e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_interp"); 1117f5b9731SStan Tomov } 1123513a710Sjeremylt break; 1133513a710Sjeremylt case CEED_EVAL_GRAD: { 1147f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 1157f5b9731SStan Tomov // In CEED_NOTRANSPOSE mode: 1167f5b9731SStan Tomov // u is (P^dim x nc), column-major layout (nc = ncomp) 1177f5b9731SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ncomp) 1187f5b9731SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 1197f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1207f5b9731SStan Tomov P = Q1d, Q = P1d; 1217f5b9731SStan Tomov } 1227f5b9731SStan Tomov 1237f5b9731SStan Tomov // Define element sizes for dofs/quad 1247f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 1257f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 1267f5b9731SStan Tomov 1277f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 1287f5b9731SStan Tomov // dim 129868539c2SNatalie Beams // component component 130868539c2SNatalie Beams // elem elem 1317f5b9731SStan Tomov // node node 1327f5b9731SStan Tomov 1337f5b9731SStan Tomov 1347f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 1357f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 1367f5b9731SStan Tomov 1377f5b9731SStan Tomov // Element strides 138868539c2SNatalie Beams CeedInt u_elstride = eldofssize; 1397f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 1407f5b9731SStan Tomov // Component strides 141868539c2SNatalie Beams CeedInt u_compstride = nelem * eldofssize; 1427f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 1437f5b9731SStan Tomov // Dimension strides 1447f5b9731SStan Tomov CeedInt u_dimstride = 0; 1457f5b9731SStan Tomov CeedInt v_dimstride = nelem * elquadsize * ncomp; 1467f5b9731SStan Tomov 1477f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 1487f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1497f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 1507f5b9731SStan Tomov // Element strides 151868539c2SNatalie Beams v_elstride = eldofssize; 1527f5b9731SStan Tomov u_elstride = elquadsize; 1537f5b9731SStan Tomov // Component strides 154868539c2SNatalie Beams v_compstride = nelem * eldofssize; 1557f5b9731SStan Tomov u_compstride = nelem * elquadsize; 1567f5b9731SStan Tomov // Dimension strides 1577f5b9731SStan Tomov v_dimstride = 0; 1587f5b9731SStan Tomov u_dimstride = nelem * elquadsize * ncomp; 1597f5b9731SStan Tomov 1607f5b9731SStan Tomov } 1617f5b9731SStan Tomov 162e0582403Sabdelfattah83 ierr = magma_grad( P, Q, dim, ncomp, 1637f5b9731SStan Tomov impl->dinterp1d, impl->dgrad1d, tmode, 164e0582403Sabdelfattah83 u, u_elstride, u_compstride, u_dimstride, 165e0582403Sabdelfattah83 v, v_elstride, v_compstride, v_dimstride, 166e0582403Sabdelfattah83 nelem, data->basis_kernel_mode, data->maxthreads, 167e0582403Sabdelfattah83 data->queue); 168*e15f9bd0SJeremy L Thompson if (ierr != 0) CeedError(ceed, CEED_ERROR_BACKEND, 169e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_grad"); 1707f5b9731SStan Tomov } 1713513a710Sjeremylt break; 1723513a710Sjeremylt case CEED_EVAL_WEIGHT: { 1737f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) 1747f5b9731SStan Tomov // LCOV_EXCL_START 175*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 1767f5b9731SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 1777f5b9731SStan Tomov // LCOV_EXCL_STOP 1787f5b9731SStan Tomov CeedInt Q = Q1d; 1797f5b9731SStan Tomov int eldofssize = CeedIntPow(Q, dim); 180e0582403Sabdelfattah83 ierr = magma_weight(Q, dim, impl->dqweight1d, v, eldofssize, nelem, 181e0582403Sabdelfattah83 data->basis_kernel_mode, data->maxthreads, data->queue); 182*e15f9bd0SJeremy L Thompson if (ierr != 0) CeedError(ceed, CEED_ERROR_BACKEND, 183e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_weight"); 1847f5b9731SStan Tomov } 1853513a710Sjeremylt break; 1863513a710Sjeremylt // LCOV_EXCL_START 1873513a710Sjeremylt case CEED_EVAL_DIV: 188*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 1893513a710Sjeremylt case CEED_EVAL_CURL: 190*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 1913513a710Sjeremylt case CEED_EVAL_NONE: 192*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 1933513a710Sjeremylt "CEED_EVAL_NONE does not make sense in this context"); 1943513a710Sjeremylt // LCOV_EXCL_STOP 1953513a710Sjeremylt } 1967f5b9731SStan Tomov 197e0582403Sabdelfattah83 // must sync to ensure completeness 198e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 199e0582403Sabdelfattah83 2007f5b9731SStan Tomov if (emode!=CEED_EVAL_WEIGHT) { 201*e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(U, &u); CeedChkBackend(ierr); 2027f5b9731SStan Tomov } 203*e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(V, &v); CeedChkBackend(ierr); 204*e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2057f5b9731SStan Tomov } 2067f5b9731SStan Tomov 2077f5b9731SStan Tomov #ifdef __cplusplus 2087f5b9731SStan Tomov CEED_INTERN "C" 2097f5b9731SStan Tomov #endif 210868539c2SNatalie Beams int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt nelem, 211868539c2SNatalie Beams CeedTransposeMode tmode, CeedEvalMode emode, 212868539c2SNatalie Beams CeedVector U, CeedVector V) { 213868539c2SNatalie Beams int ierr; 214868539c2SNatalie Beams Ceed ceed; 215*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 216e0582403Sabdelfattah83 217e0582403Sabdelfattah83 Ceed_Magma *data; 218*e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 219e0582403Sabdelfattah83 220868539c2SNatalie Beams CeedInt dim, ncomp, ndof, nqpt; 221*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 222*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 223*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr); 224*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChkBackend(ierr); 225868539c2SNatalie Beams const CeedScalar *du; 226868539c2SNatalie Beams CeedScalar *dv; 227868539c2SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 228*e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr); 229868539c2SNatalie Beams } else if (emode != CEED_EVAL_WEIGHT) { 230868539c2SNatalie Beams // LCOV_EXCL_START 231*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 232868539c2SNatalie Beams "An input vector is required for this CeedEvalMode"); 233868539c2SNatalie Beams // LCOV_EXCL_STOP 234868539c2SNatalie Beams } 235*e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); 236868539c2SNatalie Beams 237868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 238*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 239868539c2SNatalie Beams 240868539c2SNatalie Beams CeedDebug("\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d", 241868539c2SNatalie Beams ncomp*ndof, ncomp); 242868539c2SNatalie Beams 243868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) { 244868539c2SNatalie Beams CeedInt length; 245868539c2SNatalie Beams ierr = CeedVectorGetLength(V, &length); 246e0582403Sabdelfattah83 magmablas_dlaset(MagmaFull, length, 1, 0., 0., dv, length, data->queue); 247e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 248868539c2SNatalie Beams } 249868539c2SNatalie Beams switch (emode) { 250868539c2SNatalie Beams case CEED_EVAL_INTERP: { 251868539c2SNatalie Beams CeedInt P = ndof, Q = nqpt; 252868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) 253e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 254868539c2SNatalie Beams P, nelem*ncomp, Q, 255868539c2SNatalie Beams 1.0, impl->dinterp, P, 256868539c2SNatalie Beams du, Q, 257e0582403Sabdelfattah83 0.0, dv, P, data->queue); 258868539c2SNatalie Beams else 259e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans, 260868539c2SNatalie Beams Q, nelem*ncomp, P, 261868539c2SNatalie Beams 1.0, impl->dinterp, P, 262868539c2SNatalie Beams du, P, 263e0582403Sabdelfattah83 0.0, dv, Q, data->queue); 264868539c2SNatalie Beams } 265868539c2SNatalie Beams break; 266868539c2SNatalie Beams 267868539c2SNatalie Beams case CEED_EVAL_GRAD: { 268868539c2SNatalie Beams CeedInt P = ndof, Q = nqpt; 269868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) { 270868539c2SNatalie Beams double beta = 0.0; 271868539c2SNatalie Beams for(int d=0; d<dim; d++) { 272868539c2SNatalie Beams if (d>0) 273868539c2SNatalie Beams beta = 1.0; 274e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 275868539c2SNatalie Beams P, nelem*ncomp, Q, 276868539c2SNatalie Beams 1.0, impl->dgrad + d*P*Q, P, 277868539c2SNatalie Beams du + d*nelem*ncomp*Q, Q, 278e0582403Sabdelfattah83 beta, dv, P, data->queue); 279868539c2SNatalie Beams } 280868539c2SNatalie Beams } else { 281868539c2SNatalie Beams for(int d=0; d< dim; d++) 282e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans, 283868539c2SNatalie Beams Q, nelem*ncomp, P, 284868539c2SNatalie Beams 1.0, impl->dgrad + d*P*Q, P, 285868539c2SNatalie Beams du, P, 286e0582403Sabdelfattah83 0.0, dv + d*nelem*ncomp*Q, Q, data->queue); 287868539c2SNatalie Beams } 288868539c2SNatalie Beams } 289868539c2SNatalie Beams break; 290868539c2SNatalie Beams 291868539c2SNatalie Beams case CEED_EVAL_WEIGHT: { 292868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) 293868539c2SNatalie Beams // LCOV_EXCL_START 294*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 295868539c2SNatalie Beams "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 296868539c2SNatalie Beams // LCOV_EXCL_STOP 297868539c2SNatalie Beams 298868539c2SNatalie Beams int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1; 299868539c2SNatalie Beams int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)? 300868539c2SNatalie Beams 1 : 0 ); 301e0582403Sabdelfattah83 magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv, 302e0582403Sabdelfattah83 data->queue); 303*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 304868539c2SNatalie Beams } 305868539c2SNatalie Beams break; 306868539c2SNatalie Beams 307868539c2SNatalie Beams // LCOV_EXCL_START 308868539c2SNatalie Beams case CEED_EVAL_DIV: 309*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 310868539c2SNatalie Beams case CEED_EVAL_CURL: 311*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 312868539c2SNatalie Beams case CEED_EVAL_NONE: 313*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 314868539c2SNatalie Beams "CEED_EVAL_NONE does not make sense in this context"); 315868539c2SNatalie Beams // LCOV_EXCL_STOP 316868539c2SNatalie Beams } 317868539c2SNatalie Beams 318e0582403Sabdelfattah83 // must sync to ensure completeness 319e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 320e0582403Sabdelfattah83 321868539c2SNatalie Beams if (emode!=CEED_EVAL_WEIGHT) { 322*e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(U, &du); CeedChkBackend(ierr); 323868539c2SNatalie Beams } 324*e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(V, &dv); CeedChkBackend(ierr); 325*e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 326868539c2SNatalie Beams } 327868539c2SNatalie Beams 328868539c2SNatalie Beams #ifdef __cplusplus 329868539c2SNatalie Beams CEED_INTERN "C" 330868539c2SNatalie Beams #endif 3313513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) { 3327f5b9731SStan Tomov int ierr; 3337f5b9731SStan Tomov CeedBasis_Magma *impl; 334*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 3357f5b9731SStan Tomov 336*e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqref1d); CeedChkBackend(ierr); 337*e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dinterp1d); CeedChkBackend(ierr); 338*e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dgrad1d); CeedChkBackend(ierr); 339*e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqweight1d); CeedChkBackend(ierr); 3407f5b9731SStan Tomov 341*e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 3427f5b9731SStan Tomov 343*e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3447f5b9731SStan Tomov } 3457f5b9731SStan Tomov 3467f5b9731SStan Tomov #ifdef __cplusplus 3477f5b9731SStan Tomov CEED_INTERN "C" 3487f5b9731SStan Tomov #endif 349868539c2SNatalie Beams int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) { 350868539c2SNatalie Beams int ierr; 351868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 352*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 353868539c2SNatalie Beams 354*e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqref); CeedChkBackend(ierr); 355*e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dinterp); CeedChkBackend(ierr); 356*e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dgrad); CeedChkBackend(ierr); 357*e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqweight); CeedChkBackend(ierr); 358868539c2SNatalie Beams 359*e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 360868539c2SNatalie Beams 361*e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 362868539c2SNatalie Beams } 363868539c2SNatalie Beams 364868539c2SNatalie Beams #ifdef __cplusplus 365868539c2SNatalie Beams CEED_INTERN "C" 366868539c2SNatalie Beams #endif 3673513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, 3683513a710Sjeremylt const CeedScalar *interp1d, 3697f5b9731SStan Tomov const CeedScalar *grad1d, 3707f5b9731SStan Tomov const CeedScalar *qref1d, 3713513a710Sjeremylt const CeedScalar *qweight1d, CeedBasis basis) { 3727f5b9731SStan Tomov int ierr; 3737f5b9731SStan Tomov CeedBasis_Magma *impl; 3747f5b9731SStan Tomov Ceed ceed; 375*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 3767f5b9731SStan Tomov 377c9f8acf2SJeremy L Thompson // Check for supported parameters 378c9f8acf2SJeremy L Thompson CeedInt ncomp = 0; 379*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 380c9f8acf2SJeremy L Thompson if (ncomp > 3) 381c9f8acf2SJeremy L Thompson // LCOV_EXCL_START 382*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 383c9f8acf2SJeremy L Thompson "Magma backend does not support tensor bases with more than 3 components"); 384c9f8acf2SJeremy L Thompson // LCOV_EXCL_STOP 385c9f8acf2SJeremy L Thompson if (P1d > 10) 386c9f8acf2SJeremy L Thompson // LCOV_EXCL_START 387*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 388c9f8acf2SJeremy L Thompson "Magma backend does not support tensor bases with more than 10 nodes in each dimension"); 389c9f8acf2SJeremy L Thompson // LCOV_EXCL_STOP 390c9f8acf2SJeremy L Thompson if (Q1d > 10) 391c9f8acf2SJeremy L Thompson // LCOV_EXCL_START 392*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 393c9f8acf2SJeremy L Thompson "Magma backend does not support tensor bases with more than 10 quadrature points in each dimension"); 394c9f8acf2SJeremy L Thompson // LCOV_EXCL_STOP 395c9f8acf2SJeremy L Thompson 396e0582403Sabdelfattah83 Ceed_Magma *data; 397*e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 398e0582403Sabdelfattah83 3997f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 400*e15f9bd0SJeremy L Thompson CeedBasisApply_Magma); CeedChkBackend(ierr); 4017f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 402*e15f9bd0SJeremy L Thompson CeedBasisDestroy_Magma); CeedChkBackend(ierr); 4037f5b9731SStan Tomov 404*e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr); 405*e15f9bd0SJeremy L Thompson ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr); 4067f5b9731SStan Tomov 4077f5b9731SStan Tomov // Copy qref1d to the GPU 4087f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0])); 409*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 410e0582403Sabdelfattah83 magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1, 411e0582403Sabdelfattah83 data->queue); 4127f5b9731SStan Tomov 4137f5b9731SStan Tomov // Copy interp1d to the GPU 4147f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0])); 415*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 416e0582403Sabdelfattah83 magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1, 417e0582403Sabdelfattah83 data->queue); 4187f5b9731SStan Tomov 4197f5b9731SStan Tomov // Copy grad1d to the GPU 4207f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0])); 421*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 422e0582403Sabdelfattah83 magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1, 423e0582403Sabdelfattah83 data->queue); 4247f5b9731SStan Tomov 4257f5b9731SStan Tomov // Copy qweight1d to the GPU 4267f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0])); 427*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 428e0582403Sabdelfattah83 magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1, 429e0582403Sabdelfattah83 data->queue); 4307f5b9731SStan Tomov 431*e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4327f5b9731SStan Tomov } 4337f5b9731SStan Tomov 4347f5b9731SStan Tomov #ifdef __cplusplus 4357f5b9731SStan Tomov CEED_INTERN "C" 4367f5b9731SStan Tomov #endif 4373513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof, 4383513a710Sjeremylt CeedInt nqpts, const CeedScalar *interp, 4393513a710Sjeremylt const CeedScalar *grad, const CeedScalar *qref, 4403513a710Sjeremylt const CeedScalar *qweight, CeedBasis basis) { 4417f5b9731SStan Tomov int ierr; 442868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 4437f5b9731SStan Tomov Ceed ceed; 444*e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 4457f5b9731SStan Tomov 446e0582403Sabdelfattah83 Ceed_Magma *data; 447*e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 448e0582403Sabdelfattah83 449868539c2SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 450*e15f9bd0SJeremy L Thompson CeedBasisApplyNonTensor_Magma); CeedChkBackend(ierr); 451868539c2SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 452*e15f9bd0SJeremy L Thompson CeedBasisDestroyNonTensor_Magma); CeedChkBackend(ierr); 453868539c2SNatalie Beams 454*e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr); 455*e15f9bd0SJeremy L Thompson ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr); 456868539c2SNatalie Beams 457868539c2SNatalie Beams // Copy qref to the GPU 458868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0])); 459*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 460e0582403Sabdelfattah83 magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1, data->queue); 461868539c2SNatalie Beams 462868539c2SNatalie Beams // Copy interp to the GPU 463868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0])); 464*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 465e0582403Sabdelfattah83 magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1, 466e0582403Sabdelfattah83 data->queue); 467868539c2SNatalie Beams 468868539c2SNatalie Beams // Copy grad to the GPU 469868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0])); 470*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 471e0582403Sabdelfattah83 magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1, 472e0582403Sabdelfattah83 data->queue); 473868539c2SNatalie Beams 474868539c2SNatalie Beams // Copy qweight to the GPU 475868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0])); 476*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 477e0582403Sabdelfattah83 magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1, 478e0582403Sabdelfattah83 data->queue); 479868539c2SNatalie Beams 480*e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4817f5b9731SStan Tomov } 482