17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details. 47f5b9731SStan Tomov // 57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and 87f5b9731SStan Tomov // source code availability see http://github.com/ceed. 97f5b9731SStan Tomov // 107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early 157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 167f5b9731SStan Tomov 177f5b9731SStan Tomov #include "ceed-magma.h" 187f5b9731SStan Tomov 197f5b9731SStan Tomov #ifdef __cplusplus 207f5b9731SStan Tomov CEED_INTERN "C" 217f5b9731SStan Tomov #endif 227f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, 237f5b9731SStan Tomov CeedTransposeMode tmode, CeedEvalMode emode, 243513a710Sjeremylt CeedVector U, CeedVector V) { 257f5b9731SStan Tomov int ierr; 267f5b9731SStan Tomov Ceed ceed; 277f5b9731SStan Tomov ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 28e0582403Sabdelfattah83 CeedInt dim, ncomp, ndof; 297f5b9731SStan Tomov ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 307f5b9731SStan Tomov ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr); 317f5b9731SStan Tomov ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr); 32e0582403Sabdelfattah83 33e0582403Sabdelfattah83 Ceed_Magma *data; 34*777ff853SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChk(ierr); 35e0582403Sabdelfattah83 367f5b9731SStan Tomov const CeedScalar *u; 377f5b9731SStan Tomov CeedScalar *v; 38868539c2SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 397f5b9731SStan Tomov ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChk(ierr); 407f5b9731SStan Tomov } else if (emode != CEED_EVAL_WEIGHT) { 417f5b9731SStan Tomov // LCOV_EXCL_START 427f5b9731SStan Tomov return CeedError(ceed, 1, 437f5b9731SStan Tomov "An input vector is required for this CeedEvalMode"); 447f5b9731SStan Tomov // LCOV_EXCL_STOP 457f5b9731SStan Tomov } 467f5b9731SStan Tomov ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChk(ierr); 477f5b9731SStan Tomov 487f5b9731SStan Tomov CeedBasis_Magma *impl; 49*777ff853SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChk(ierr); 507f5b9731SStan Tomov 517f5b9731SStan Tomov CeedInt P1d, Q1d; 527f5b9731SStan Tomov ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr); 537f5b9731SStan Tomov ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr); 547f5b9731SStan Tomov 557f5b9731SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d", 567f5b9731SStan Tomov ncomp*CeedIntPow(P1d, dim), ncomp); 577f5b9731SStan Tomov 587f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 597f5b9731SStan Tomov CeedInt length; 60465fc175SJeremy L Thompson ierr = CeedVectorGetLength(V, &length); CeedChk(ierr); 61e0582403Sabdelfattah83 magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length, data->queue); 62e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 637f5b9731SStan Tomov } 643513a710Sjeremylt switch (emode) { 653513a710Sjeremylt case CEED_EVAL_INTERP: { 667f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 677f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 687f5b9731SStan Tomov P = Q1d; Q = P1d; 697f5b9731SStan Tomov } 707f5b9731SStan Tomov 717f5b9731SStan Tomov // Define element sizes for dofs/quad 727f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 737f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 747f5b9731SStan Tomov 757f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 76868539c2SNatalie Beams // component component 77868539c2SNatalie Beams // elem elem 787f5b9731SStan Tomov // node node 797f5b9731SStan Tomov 807f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 817f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 827f5b9731SStan Tomov 837f5b9731SStan Tomov // Element strides 84868539c2SNatalie Beams CeedInt u_elstride = eldofssize; 857f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 867f5b9731SStan Tomov // Component strides 87868539c2SNatalie Beams CeedInt u_compstride = nelem * eldofssize; 887f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 897f5b9731SStan Tomov 907f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 917f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 927f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 937f5b9731SStan Tomov // Element strides 94868539c2SNatalie Beams v_elstride = eldofssize; 957f5b9731SStan Tomov u_elstride = elquadsize; 967f5b9731SStan Tomov // Component strides 97868539c2SNatalie Beams v_compstride = nelem * eldofssize; 987f5b9731SStan Tomov u_compstride = nelem * elquadsize; 997f5b9731SStan Tomov } 1007f5b9731SStan Tomov 101e0582403Sabdelfattah83 ierr = magma_interp(P, Q, dim, ncomp, 1027f5b9731SStan Tomov impl->dinterp1d, tmode, 103868539c2SNatalie Beams u, u_elstride, u_compstride, 104868539c2SNatalie Beams v, v_elstride, v_compstride, 105e0582403Sabdelfattah83 nelem, data->basis_kernel_mode, data->maxthreads, 106e0582403Sabdelfattah83 data->queue); 107e0582403Sabdelfattah83 if (ierr != 0) CeedError(ceed, 1, 108e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_interp"); 1097f5b9731SStan Tomov } 1103513a710Sjeremylt break; 1113513a710Sjeremylt case CEED_EVAL_GRAD: { 1127f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 1137f5b9731SStan Tomov // In CEED_NOTRANSPOSE mode: 1147f5b9731SStan Tomov // u is (P^dim x nc), column-major layout (nc = ncomp) 1157f5b9731SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ncomp) 1167f5b9731SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 1177f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1187f5b9731SStan Tomov P = Q1d, Q = P1d; 1197f5b9731SStan Tomov } 1207f5b9731SStan Tomov 1217f5b9731SStan Tomov // Define element sizes for dofs/quad 1227f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 1237f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 1247f5b9731SStan Tomov 1257f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 1267f5b9731SStan Tomov // dim 127868539c2SNatalie Beams // component component 128868539c2SNatalie Beams // elem elem 1297f5b9731SStan Tomov // node node 1307f5b9731SStan Tomov 1317f5b9731SStan Tomov 1327f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 1337f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 1347f5b9731SStan Tomov 1357f5b9731SStan Tomov // Element strides 136868539c2SNatalie Beams CeedInt u_elstride = eldofssize; 1377f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 1387f5b9731SStan Tomov // Component strides 139868539c2SNatalie Beams CeedInt u_compstride = nelem * eldofssize; 1407f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 1417f5b9731SStan Tomov // Dimension strides 1427f5b9731SStan Tomov CeedInt u_dimstride = 0; 1437f5b9731SStan Tomov CeedInt v_dimstride = nelem * elquadsize * ncomp; 1447f5b9731SStan Tomov 1457f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 1467f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1477f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 1487f5b9731SStan Tomov // Element strides 149868539c2SNatalie Beams v_elstride = eldofssize; 1507f5b9731SStan Tomov u_elstride = elquadsize; 1517f5b9731SStan Tomov // Component strides 152868539c2SNatalie Beams v_compstride = nelem * eldofssize; 1537f5b9731SStan Tomov u_compstride = nelem * elquadsize; 1547f5b9731SStan Tomov // Dimension strides 1557f5b9731SStan Tomov v_dimstride = 0; 1567f5b9731SStan Tomov u_dimstride = nelem * elquadsize * ncomp; 1577f5b9731SStan Tomov 1587f5b9731SStan Tomov } 1597f5b9731SStan Tomov 160e0582403Sabdelfattah83 ierr = magma_grad( P, Q, dim, ncomp, 1617f5b9731SStan Tomov impl->dinterp1d, impl->dgrad1d, tmode, 162e0582403Sabdelfattah83 u, u_elstride, u_compstride, u_dimstride, 163e0582403Sabdelfattah83 v, v_elstride, v_compstride, v_dimstride, 164e0582403Sabdelfattah83 nelem, data->basis_kernel_mode, data->maxthreads, 165e0582403Sabdelfattah83 data->queue); 166e0582403Sabdelfattah83 if (ierr != 0) CeedError(ceed, 1, 167e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_grad"); 1687f5b9731SStan Tomov } 1693513a710Sjeremylt break; 1703513a710Sjeremylt case CEED_EVAL_WEIGHT: { 1717f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) 1727f5b9731SStan Tomov // LCOV_EXCL_START 1737f5b9731SStan Tomov return CeedError(ceed, 1, 1747f5b9731SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 1757f5b9731SStan Tomov // LCOV_EXCL_STOP 1767f5b9731SStan Tomov CeedInt Q = Q1d; 1777f5b9731SStan Tomov int eldofssize = CeedIntPow(Q, dim); 178e0582403Sabdelfattah83 ierr = magma_weight(Q, dim, impl->dqweight1d, v, eldofssize, nelem, 179e0582403Sabdelfattah83 data->basis_kernel_mode, data->maxthreads, data->queue); 180e0582403Sabdelfattah83 if (ierr != 0) CeedError(ceed, 1, 181e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_weight"); 1827f5b9731SStan Tomov } 1833513a710Sjeremylt break; 1843513a710Sjeremylt // LCOV_EXCL_START 1853513a710Sjeremylt case CEED_EVAL_DIV: 1863513a710Sjeremylt return CeedError(ceed, 1, "CEED_EVAL_DIV not supported"); 1873513a710Sjeremylt case CEED_EVAL_CURL: 1883513a710Sjeremylt return CeedError(ceed, 1, "CEED_EVAL_CURL not supported"); 1893513a710Sjeremylt case CEED_EVAL_NONE: 1903513a710Sjeremylt return CeedError(ceed, 1, 1913513a710Sjeremylt "CEED_EVAL_NONE does not make sense in this context"); 1923513a710Sjeremylt // LCOV_EXCL_STOP 1933513a710Sjeremylt } 1947f5b9731SStan Tomov 195e0582403Sabdelfattah83 // must sync to ensure completeness 196e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 197e0582403Sabdelfattah83 1987f5b9731SStan Tomov if (emode!=CEED_EVAL_WEIGHT) { 1997f5b9731SStan Tomov ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr); 2007f5b9731SStan Tomov } 2017f5b9731SStan Tomov ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr); 2027f5b9731SStan Tomov return 0; 2037f5b9731SStan Tomov } 2047f5b9731SStan Tomov 2057f5b9731SStan Tomov #ifdef __cplusplus 2067f5b9731SStan Tomov CEED_INTERN "C" 2077f5b9731SStan Tomov #endif 208868539c2SNatalie Beams int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt nelem, 209868539c2SNatalie Beams CeedTransposeMode tmode, CeedEvalMode emode, 210868539c2SNatalie Beams CeedVector U, CeedVector V) { 211868539c2SNatalie Beams int ierr; 212868539c2SNatalie Beams Ceed ceed; 213868539c2SNatalie Beams ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 214e0582403Sabdelfattah83 215e0582403Sabdelfattah83 Ceed_Magma *data; 216*777ff853SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChk(ierr); 217e0582403Sabdelfattah83 218868539c2SNatalie Beams CeedInt dim, ncomp, ndof, nqpt; 219868539c2SNatalie Beams ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 220868539c2SNatalie Beams ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr); 221868539c2SNatalie Beams ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr); 222868539c2SNatalie Beams ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr); 223868539c2SNatalie Beams const CeedScalar *du; 224868539c2SNatalie Beams CeedScalar *dv; 225868539c2SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 226868539c2SNatalie Beams ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChk(ierr); 227868539c2SNatalie Beams } else if (emode != CEED_EVAL_WEIGHT) { 228868539c2SNatalie Beams // LCOV_EXCL_START 229868539c2SNatalie Beams return CeedError(ceed, 1, 230868539c2SNatalie Beams "An input vector is required for this CeedEvalMode"); 231868539c2SNatalie Beams // LCOV_EXCL_STOP 232868539c2SNatalie Beams } 233868539c2SNatalie Beams ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChk(ierr); 234868539c2SNatalie Beams 235868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 236*777ff853SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChk(ierr); 237868539c2SNatalie Beams 238868539c2SNatalie Beams CeedDebug("\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d", 239868539c2SNatalie Beams ncomp*ndof, ncomp); 240868539c2SNatalie Beams 241868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) { 242868539c2SNatalie Beams CeedInt length; 243868539c2SNatalie Beams ierr = CeedVectorGetLength(V, &length); 244e0582403Sabdelfattah83 magmablas_dlaset(MagmaFull, length, 1, 0., 0., dv, length, data->queue); 245e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 246868539c2SNatalie Beams } 247868539c2SNatalie Beams switch (emode) { 248868539c2SNatalie Beams case CEED_EVAL_INTERP: { 249868539c2SNatalie Beams CeedInt P = ndof, Q = nqpt; 250868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) 251e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 252868539c2SNatalie Beams P, nelem*ncomp, Q, 253868539c2SNatalie Beams 1.0, impl->dinterp, P, 254868539c2SNatalie Beams du, Q, 255e0582403Sabdelfattah83 0.0, dv, P, data->queue); 256868539c2SNatalie Beams else 257e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans, 258868539c2SNatalie Beams Q, nelem*ncomp, P, 259868539c2SNatalie Beams 1.0, impl->dinterp, P, 260868539c2SNatalie Beams du, P, 261e0582403Sabdelfattah83 0.0, dv, Q, data->queue); 262868539c2SNatalie Beams } 263868539c2SNatalie Beams break; 264868539c2SNatalie Beams 265868539c2SNatalie Beams case CEED_EVAL_GRAD: { 266868539c2SNatalie Beams CeedInt P = ndof, Q = nqpt; 267868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) { 268868539c2SNatalie Beams double beta = 0.0; 269868539c2SNatalie Beams for(int d=0; d<dim; d++) { 270868539c2SNatalie Beams if (d>0) 271868539c2SNatalie Beams beta = 1.0; 272e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 273868539c2SNatalie Beams P, nelem*ncomp, Q, 274868539c2SNatalie Beams 1.0, impl->dgrad + d*P*Q, P, 275868539c2SNatalie Beams du + d*nelem*ncomp*Q, Q, 276e0582403Sabdelfattah83 beta, dv, P, data->queue); 277868539c2SNatalie Beams } 278868539c2SNatalie Beams } else { 279868539c2SNatalie Beams for(int d=0; d< dim; d++) 280e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans, 281868539c2SNatalie Beams Q, nelem*ncomp, P, 282868539c2SNatalie Beams 1.0, impl->dgrad + d*P*Q, P, 283868539c2SNatalie Beams du, P, 284e0582403Sabdelfattah83 0.0, dv + d*nelem*ncomp*Q, Q, data->queue); 285868539c2SNatalie Beams } 286868539c2SNatalie Beams } 287868539c2SNatalie Beams break; 288868539c2SNatalie Beams 289868539c2SNatalie Beams case CEED_EVAL_WEIGHT: { 290868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) 291868539c2SNatalie Beams // LCOV_EXCL_START 292868539c2SNatalie Beams return CeedError(ceed, 1, 293868539c2SNatalie Beams "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 294868539c2SNatalie Beams // LCOV_EXCL_STOP 295868539c2SNatalie Beams 296868539c2SNatalie Beams int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1; 297868539c2SNatalie Beams int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)? 298868539c2SNatalie Beams 1 : 0 ); 299e0582403Sabdelfattah83 magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv, 300e0582403Sabdelfattah83 data->queue); 301868539c2SNatalie Beams CeedChk(ierr); 302868539c2SNatalie Beams } 303868539c2SNatalie Beams break; 304868539c2SNatalie Beams 305868539c2SNatalie Beams // LCOV_EXCL_START 306868539c2SNatalie Beams case CEED_EVAL_DIV: 307868539c2SNatalie Beams return CeedError(ceed, 1, "CEED_EVAL_DIV not supported"); 308868539c2SNatalie Beams case CEED_EVAL_CURL: 309868539c2SNatalie Beams return CeedError(ceed, 1, "CEED_EVAL_CURL not supported"); 310868539c2SNatalie Beams case CEED_EVAL_NONE: 311868539c2SNatalie Beams return CeedError(ceed, 1, 312868539c2SNatalie Beams "CEED_EVAL_NONE does not make sense in this context"); 313868539c2SNatalie Beams // LCOV_EXCL_STOP 314868539c2SNatalie Beams } 315868539c2SNatalie Beams 316e0582403Sabdelfattah83 // must sync to ensure completeness 317e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 318e0582403Sabdelfattah83 319868539c2SNatalie Beams if (emode!=CEED_EVAL_WEIGHT) { 320868539c2SNatalie Beams ierr = CeedVectorRestoreArrayRead(U, &du); CeedChk(ierr); 321868539c2SNatalie Beams } 322868539c2SNatalie Beams ierr = CeedVectorRestoreArray(V, &dv); CeedChk(ierr); 323868539c2SNatalie Beams return 0; 324868539c2SNatalie Beams } 325868539c2SNatalie Beams 326868539c2SNatalie Beams #ifdef __cplusplus 327868539c2SNatalie Beams CEED_INTERN "C" 328868539c2SNatalie Beams #endif 3293513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) { 3307f5b9731SStan Tomov int ierr; 3317f5b9731SStan Tomov CeedBasis_Magma *impl; 332*777ff853SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChk(ierr); 3337f5b9731SStan Tomov 3347f5b9731SStan Tomov ierr = magma_free(impl->dqref1d); CeedChk(ierr); 3357f5b9731SStan Tomov ierr = magma_free(impl->dinterp1d); CeedChk(ierr); 3367f5b9731SStan Tomov ierr = magma_free(impl->dgrad1d); CeedChk(ierr); 3377f5b9731SStan Tomov ierr = magma_free(impl->dqweight1d); CeedChk(ierr); 3387f5b9731SStan Tomov 3397f5b9731SStan Tomov ierr = CeedFree(&impl); CeedChk(ierr); 3407f5b9731SStan Tomov 3417f5b9731SStan Tomov return 0; 3427f5b9731SStan Tomov } 3437f5b9731SStan Tomov 3447f5b9731SStan Tomov #ifdef __cplusplus 3457f5b9731SStan Tomov CEED_INTERN "C" 3467f5b9731SStan Tomov #endif 347868539c2SNatalie Beams int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) { 348868539c2SNatalie Beams int ierr; 349868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 350*777ff853SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChk(ierr); 351868539c2SNatalie Beams 352868539c2SNatalie Beams ierr = magma_free(impl->dqref); CeedChk(ierr); 353868539c2SNatalie Beams ierr = magma_free(impl->dinterp); CeedChk(ierr); 354868539c2SNatalie Beams ierr = magma_free(impl->dgrad); CeedChk(ierr); 355868539c2SNatalie Beams ierr = magma_free(impl->dqweight); CeedChk(ierr); 356868539c2SNatalie Beams 357868539c2SNatalie Beams ierr = CeedFree(&impl); CeedChk(ierr); 358868539c2SNatalie Beams 359868539c2SNatalie Beams return 0; 360868539c2SNatalie Beams } 361868539c2SNatalie Beams 362868539c2SNatalie Beams #ifdef __cplusplus 363868539c2SNatalie Beams CEED_INTERN "C" 364868539c2SNatalie Beams #endif 3653513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, 3663513a710Sjeremylt const CeedScalar *interp1d, 3677f5b9731SStan Tomov const CeedScalar *grad1d, 3687f5b9731SStan Tomov const CeedScalar *qref1d, 3693513a710Sjeremylt const CeedScalar *qweight1d, CeedBasis basis) { 3707f5b9731SStan Tomov int ierr; 3717f5b9731SStan Tomov CeedBasis_Magma *impl; 3727f5b9731SStan Tomov Ceed ceed; 3737f5b9731SStan Tomov ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 3747f5b9731SStan Tomov 375e0582403Sabdelfattah83 Ceed_Magma *data; 376*777ff853SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChk(ierr); 377e0582403Sabdelfattah83 3787f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 3797f5b9731SStan Tomov CeedBasisApply_Magma); CeedChk(ierr); 3807f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 3817f5b9731SStan Tomov CeedBasisDestroy_Magma); CeedChk(ierr); 3827f5b9731SStan Tomov 3837f5b9731SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 384*777ff853SJeremy L Thompson ierr = CeedBasisSetData(basis, impl); CeedChk(ierr); 3857f5b9731SStan Tomov 3867f5b9731SStan Tomov // Copy qref1d to the GPU 3877f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0])); 3887f5b9731SStan Tomov CeedChk(ierr); 389e0582403Sabdelfattah83 magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1, 390e0582403Sabdelfattah83 data->queue); 3917f5b9731SStan Tomov 3927f5b9731SStan Tomov // Copy interp1d to the GPU 3937f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0])); 3947f5b9731SStan Tomov CeedChk(ierr); 395e0582403Sabdelfattah83 magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1, 396e0582403Sabdelfattah83 data->queue); 3977f5b9731SStan Tomov 3987f5b9731SStan Tomov // Copy grad1d to the GPU 3997f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0])); 4007f5b9731SStan Tomov CeedChk(ierr); 401e0582403Sabdelfattah83 magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1, 402e0582403Sabdelfattah83 data->queue); 4037f5b9731SStan Tomov 4047f5b9731SStan Tomov // Copy qweight1d to the GPU 4057f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0])); 4067f5b9731SStan Tomov CeedChk(ierr); 407e0582403Sabdelfattah83 magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1, 408e0582403Sabdelfattah83 data->queue); 4097f5b9731SStan Tomov 4107f5b9731SStan Tomov return 0; 4117f5b9731SStan Tomov } 4127f5b9731SStan Tomov 4137f5b9731SStan Tomov #ifdef __cplusplus 4147f5b9731SStan Tomov CEED_INTERN "C" 4157f5b9731SStan Tomov #endif 4163513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof, 4173513a710Sjeremylt CeedInt nqpts, const CeedScalar *interp, 4183513a710Sjeremylt const CeedScalar *grad, const CeedScalar *qref, 4193513a710Sjeremylt const CeedScalar *qweight, CeedBasis basis) { 4207f5b9731SStan Tomov int ierr; 421868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 4227f5b9731SStan Tomov Ceed ceed; 4237f5b9731SStan Tomov ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 4247f5b9731SStan Tomov 425e0582403Sabdelfattah83 Ceed_Magma *data; 426*777ff853SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChk(ierr); 427e0582403Sabdelfattah83 428868539c2SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 429868539c2SNatalie Beams CeedBasisApplyNonTensor_Magma); CeedChk(ierr); 430868539c2SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 431868539c2SNatalie Beams CeedBasisDestroyNonTensor_Magma); CeedChk(ierr); 432868539c2SNatalie Beams 433868539c2SNatalie Beams ierr = CeedCalloc(1,&impl); CeedChk(ierr); 434*777ff853SJeremy L Thompson ierr = CeedBasisSetData(basis, impl); CeedChk(ierr); 435868539c2SNatalie Beams 436868539c2SNatalie Beams // Copy qref to the GPU 437868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0])); 438868539c2SNatalie Beams CeedChk(ierr); 439e0582403Sabdelfattah83 magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1, data->queue); 440868539c2SNatalie Beams 441868539c2SNatalie Beams // Copy interp to the GPU 442868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0])); 443868539c2SNatalie Beams CeedChk(ierr); 444e0582403Sabdelfattah83 magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1, 445e0582403Sabdelfattah83 data->queue); 446868539c2SNatalie Beams 447868539c2SNatalie Beams // Copy grad to the GPU 448868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0])); 449868539c2SNatalie Beams CeedChk(ierr); 450e0582403Sabdelfattah83 magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1, 451e0582403Sabdelfattah83 data->queue); 452868539c2SNatalie Beams 453868539c2SNatalie Beams // Copy qweight to the GPU 454868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0])); 455868539c2SNatalie Beams CeedChk(ierr); 456e0582403Sabdelfattah83 magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1, 457e0582403Sabdelfattah83 data->queue); 458868539c2SNatalie Beams 459868539c2SNatalie Beams return 0; 4607f5b9731SStan Tomov } 461