1*82b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2*82b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3*82b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details. 4*82b77998SStan Tomov // 5*82b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 6*82b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 7*82b77998SStan Tomov // element discretizations for exascale applications. For more information and 8*82b77998SStan Tomov // source code availability see http://github.com/ceed. 9*82b77998SStan Tomov // 10*82b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*82b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 12*82b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 13*82b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 14*82b77998SStan Tomov // software, applications, hardware, advanced system engineering and early 15*82b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 16*82b77998SStan Tomov 17*82b77998SStan Tomov #include <ceed-impl.h> 18*82b77998SStan Tomov #include <string.h> 19*82b77998SStan Tomov 20*82b77998SStan Tomov typedef struct { 21*82b77998SStan Tomov CeedScalar *array; 22*82b77998SStan Tomov CeedScalar *array_allocated; 23*82b77998SStan Tomov } CeedVector_Magma; 24*82b77998SStan Tomov 25*82b77998SStan Tomov typedef struct { 26*82b77998SStan Tomov const CeedInt *indices; 27*82b77998SStan Tomov CeedInt *indices_allocated; 28*82b77998SStan Tomov } CeedElemRestriction_Magma; 29*82b77998SStan Tomov 30*82b77998SStan Tomov typedef struct { 31*82b77998SStan Tomov CeedVector etmp; 32*82b77998SStan Tomov CeedVector qdata; 33*82b77998SStan Tomov } CeedOperator_Magma; 34*82b77998SStan Tomov 35*82b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 36*82b77998SStan Tomov CeedCopyMode cmode, CeedScalar *array) { 37*82b77998SStan Tomov CeedVector_Magma *impl = vec->data; 38*82b77998SStan Tomov int ierr; 39*82b77998SStan Tomov 40*82b77998SStan Tomov if (mtype != CEED_MEM_HOST) 41*82b77998SStan Tomov return CeedError(vec->ceed, 1, "Only MemType = HOST supported"); 42*82b77998SStan Tomov ierr = CeedFree(&impl->array_allocated); CeedChk(ierr); 43*82b77998SStan Tomov switch (cmode) { 44*82b77998SStan Tomov case CEED_COPY_VALUES: 45*82b77998SStan Tomov ierr = CeedMalloc(vec->length, &impl->array_allocated); CeedChk(ierr); 46*82b77998SStan Tomov impl->array = impl->array_allocated; 47*82b77998SStan Tomov if (array) memcpy(impl->array, array, vec->length * sizeof(array[0])); 48*82b77998SStan Tomov break; 49*82b77998SStan Tomov case CEED_OWN_POINTER: 50*82b77998SStan Tomov impl->array_allocated = array; 51*82b77998SStan Tomov impl->array = array; 52*82b77998SStan Tomov break; 53*82b77998SStan Tomov case CEED_USE_POINTER: 54*82b77998SStan Tomov impl->array = array; 55*82b77998SStan Tomov } 56*82b77998SStan Tomov return 0; 57*82b77998SStan Tomov } 58*82b77998SStan Tomov 59*82b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 60*82b77998SStan Tomov CeedScalar **array) { 61*82b77998SStan Tomov CeedVector_Magma *impl = vec->data; 62*82b77998SStan Tomov int ierr; 63*82b77998SStan Tomov 64*82b77998SStan Tomov if (mtype != CEED_MEM_HOST) 65*82b77998SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST memory"); 66*82b77998SStan Tomov if (!impl->array) { // Allocate if array is not yet allocated 67*82b77998SStan Tomov ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL); 68*82b77998SStan Tomov CeedChk(ierr); 69*82b77998SStan Tomov } 70*82b77998SStan Tomov *array = impl->array; 71*82b77998SStan Tomov return 0; 72*82b77998SStan Tomov } 73*82b77998SStan Tomov 74*82b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 75*82b77998SStan Tomov const CeedScalar **array) { 76*82b77998SStan Tomov CeedVector_Magma *impl = vec->data; 77*82b77998SStan Tomov int ierr; 78*82b77998SStan Tomov 79*82b77998SStan Tomov if (mtype != CEED_MEM_HOST) 80*82b77998SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST memory"); 81*82b77998SStan Tomov if (!impl->array) { // Allocate if array is not yet allocated 82*82b77998SStan Tomov ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL); 83*82b77998SStan Tomov CeedChk(ierr); 84*82b77998SStan Tomov } 85*82b77998SStan Tomov *array = impl->array; 86*82b77998SStan Tomov return 0; 87*82b77998SStan Tomov } 88*82b77998SStan Tomov 89*82b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 90*82b77998SStan Tomov *array = NULL; 91*82b77998SStan Tomov return 0; 92*82b77998SStan Tomov } 93*82b77998SStan Tomov 94*82b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 95*82b77998SStan Tomov const CeedScalar **array) { 96*82b77998SStan Tomov *array = NULL; 97*82b77998SStan Tomov return 0; 98*82b77998SStan Tomov } 99*82b77998SStan Tomov 100*82b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) { 101*82b77998SStan Tomov CeedVector_Magma *impl = vec->data; 102*82b77998SStan Tomov int ierr; 103*82b77998SStan Tomov 104*82b77998SStan Tomov ierr = CeedFree(&impl->array_allocated); CeedChk(ierr); 105*82b77998SStan Tomov ierr = CeedFree(&vec->data); CeedChk(ierr); 106*82b77998SStan Tomov return 0; 107*82b77998SStan Tomov } 108*82b77998SStan Tomov 109*82b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 110*82b77998SStan Tomov CeedVector_Magma *impl; 111*82b77998SStan Tomov int ierr; 112*82b77998SStan Tomov 113*82b77998SStan Tomov vec->SetArray = CeedVectorSetArray_Magma; 114*82b77998SStan Tomov vec->GetArray = CeedVectorGetArray_Magma; 115*82b77998SStan Tomov vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 116*82b77998SStan Tomov vec->RestoreArray = CeedVectorRestoreArray_Magma; 117*82b77998SStan Tomov vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 118*82b77998SStan Tomov vec->Destroy = CeedVectorDestroy_Magma; 119*82b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 120*82b77998SStan Tomov vec->data = impl; 121*82b77998SStan Tomov return 0; 122*82b77998SStan Tomov } 123*82b77998SStan Tomov 124*82b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 125*82b77998SStan Tomov CeedTransposeMode tmode, CeedInt ncomp, 126*82b77998SStan Tomov CeedTransposeMode lmode, CeedVector u, 127*82b77998SStan Tomov CeedVector v, CeedRequest *request) { 128*82b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 129*82b77998SStan Tomov int ierr; 130*82b77998SStan Tomov const CeedScalar *uu; 131*82b77998SStan Tomov CeedScalar *vv; 132*82b77998SStan Tomov CeedInt esize = r->nelem*r->elemsize; 133*82b77998SStan Tomov 134*82b77998SStan Tomov ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 135*82b77998SStan Tomov ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 136*82b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 137*82b77998SStan Tomov // Perform: v = r * u 138*82b77998SStan Tomov if (ncomp == 1) { 139*82b77998SStan Tomov for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]]; 140*82b77998SStan Tomov } else { 141*82b77998SStan Tomov // vv is (elemsize x ncomp x nelem), column-major 142*82b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 143*82b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 144*82b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 145*82b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 146*82b77998SStan Tomov vv[i+r->elemsize*(d+ncomp*e)] = 147*82b77998SStan Tomov uu[impl->indices[i+r->elemsize*e]+r->ndof*d]; 148*82b77998SStan Tomov } 149*82b77998SStan Tomov } else { // u is (ncomp x ndof), column-major 150*82b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 151*82b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 152*82b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 153*82b77998SStan Tomov vv[i+r->elemsize*(d+ncomp*e)] = 154*82b77998SStan Tomov uu[d+ncomp*impl->indices[i+r->elemsize*e]]; 155*82b77998SStan Tomov } 156*82b77998SStan Tomov } 157*82b77998SStan Tomov } 158*82b77998SStan Tomov } else { 159*82b77998SStan Tomov // Note: in transpose mode, we perform: v += r^t * u 160*82b77998SStan Tomov if (ncomp == 1) { 161*82b77998SStan Tomov for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i]; 162*82b77998SStan Tomov } else { 163*82b77998SStan Tomov // u is (elemsize x ncomp x nelem) 164*82b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 165*82b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 166*82b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 167*82b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 168*82b77998SStan Tomov vv[impl->indices[i+r->elemsize*e]+r->ndof*d] += 169*82b77998SStan Tomov uu[i+r->elemsize*(d+e*ncomp)]; 170*82b77998SStan Tomov } 171*82b77998SStan Tomov } else { // vv is (ncomp x ndof), column-major 172*82b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 173*82b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 174*82b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 175*82b77998SStan Tomov vv[d+ncomp*impl->indices[i+r->elemsize*e]] += 176*82b77998SStan Tomov uu[i+r->elemsize*(d+e*ncomp)]; 177*82b77998SStan Tomov } 178*82b77998SStan Tomov } 179*82b77998SStan Tomov } 180*82b77998SStan Tomov } 181*82b77998SStan Tomov ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 182*82b77998SStan Tomov ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 183*82b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 184*82b77998SStan Tomov *request = NULL; 185*82b77998SStan Tomov return 0; 186*82b77998SStan Tomov } 187*82b77998SStan Tomov 188*82b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 189*82b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 190*82b77998SStan Tomov int ierr; 191*82b77998SStan Tomov 192*82b77998SStan Tomov ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 193*82b77998SStan Tomov ierr = CeedFree(&r->data); CeedChk(ierr); 194*82b77998SStan Tomov return 0; 195*82b77998SStan Tomov } 196*82b77998SStan Tomov 197*82b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 198*82b77998SStan Tomov CeedMemType mtype, 199*82b77998SStan Tomov CeedCopyMode cmode, const CeedInt *indices) { 200*82b77998SStan Tomov int ierr; 201*82b77998SStan Tomov CeedElemRestriction_Magma *impl; 202*82b77998SStan Tomov 203*82b77998SStan Tomov if (mtype != CEED_MEM_HOST) 204*82b77998SStan Tomov return CeedError(r->ceed, 1, "Only MemType = HOST supported"); 205*82b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 206*82b77998SStan Tomov switch (cmode) { 207*82b77998SStan Tomov case CEED_COPY_VALUES: 208*82b77998SStan Tomov ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated); 209*82b77998SStan Tomov CeedChk(ierr); 210*82b77998SStan Tomov memcpy(impl->indices_allocated, indices, 211*82b77998SStan Tomov r->nelem * r->elemsize * sizeof(indices[0])); 212*82b77998SStan Tomov impl->indices = impl->indices_allocated; 213*82b77998SStan Tomov break; 214*82b77998SStan Tomov case CEED_OWN_POINTER: 215*82b77998SStan Tomov impl->indices_allocated = (CeedInt *)indices; 216*82b77998SStan Tomov impl->indices = impl->indices_allocated; 217*82b77998SStan Tomov break; 218*82b77998SStan Tomov case CEED_USE_POINTER: 219*82b77998SStan Tomov impl->indices = indices; 220*82b77998SStan Tomov } 221*82b77998SStan Tomov r->data = impl; 222*82b77998SStan Tomov r->Apply = CeedElemRestrictionApply_Magma; 223*82b77998SStan Tomov r->Destroy = CeedElemRestrictionDestroy_Magma; 224*82b77998SStan Tomov return 0; 225*82b77998SStan Tomov } 226*82b77998SStan Tomov 227*82b77998SStan Tomov // Contracts on the middle index 228*82b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc 229*82b77998SStan Tomov // TRANSPOSE: V_ajc = T_bj U_abc 230*82b77998SStan Tomov // If Add != 0, "=" is replaced by "+=" 231*82b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed, 232*82b77998SStan Tomov CeedInt A, CeedInt B, CeedInt C, CeedInt J, 233*82b77998SStan Tomov const CeedScalar *t, CeedTransposeMode tmode, 234*82b77998SStan Tomov const CeedInt Add, 235*82b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 236*82b77998SStan Tomov CeedInt tstride0 = B, tstride1 = 1; 237*82b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 238*82b77998SStan Tomov tstride0 = 1; tstride1 = J; 239*82b77998SStan Tomov } 240*82b77998SStan Tomov 241*82b77998SStan Tomov for (CeedInt a=0; a<A; a++) { 242*82b77998SStan Tomov for (CeedInt j=0; j<J; j++) { 243*82b77998SStan Tomov if (!Add) { 244*82b77998SStan Tomov for (CeedInt c=0; c<C; c++) 245*82b77998SStan Tomov v[(a*J+j)*C+c] = 0; 246*82b77998SStan Tomov } 247*82b77998SStan Tomov for (CeedInt b=0; b<B; b++) { 248*82b77998SStan Tomov for (CeedInt c=0; c<C; c++) { 249*82b77998SStan Tomov v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 250*82b77998SStan Tomov } 251*82b77998SStan Tomov } 252*82b77998SStan Tomov } 253*82b77998SStan Tomov } 254*82b77998SStan Tomov return 0; 255*82b77998SStan Tomov } 256*82b77998SStan Tomov 257*82b77998SStan Tomov static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 258*82b77998SStan Tomov CeedEvalMode emode, 259*82b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 260*82b77998SStan Tomov int ierr; 261*82b77998SStan Tomov const CeedInt dim = basis->dim; 262*82b77998SStan Tomov const CeedInt ndof = basis->ndof; 263*82b77998SStan Tomov const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 264*82b77998SStan Tomov const CeedInt add = (tmode == CEED_TRANSPOSE); 265*82b77998SStan Tomov 266*82b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 267*82b77998SStan Tomov const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 268*82b77998SStan Tomov for (CeedInt i = 0; i < vsize; i++) 269*82b77998SStan Tomov v[i] = (CeedScalar) 0; 270*82b77998SStan Tomov } 271*82b77998SStan Tomov if (emode & CEED_EVAL_INTERP) { 272*82b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 273*82b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 274*82b77998SStan Tomov P = basis->Q1d; Q = basis->P1d; 275*82b77998SStan Tomov } 276*82b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 277*82b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 278*82b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 279*82b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 280*82b77998SStan Tomov tmode, add&&(d==dim-1), 281*82b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 282*82b77998SStan Tomov CeedChk(ierr); 283*82b77998SStan Tomov pre /= P; 284*82b77998SStan Tomov post *= Q; 285*82b77998SStan Tomov } 286*82b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 287*82b77998SStan Tomov v += nqpt; 288*82b77998SStan Tomov } else { 289*82b77998SStan Tomov u += nqpt; 290*82b77998SStan Tomov } 291*82b77998SStan Tomov } 292*82b77998SStan Tomov if (emode & CEED_EVAL_GRAD) { 293*82b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 294*82b77998SStan Tomov // In CEED_NOTRANSPOSE mode: 295*82b77998SStan Tomov // u is (P^dim x nc), column-major layout (nc = ndof) 296*82b77998SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 297*82b77998SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 298*82b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 299*82b77998SStan Tomov P = basis->Q1d, Q = basis->P1d; 300*82b77998SStan Tomov } 301*82b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 302*82b77998SStan Tomov for (CeedInt p = 0; p < dim; p++) { 303*82b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 304*82b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 305*82b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 306*82b77998SStan Tomov (p==d)?basis->grad1d:basis->interp1d, 307*82b77998SStan Tomov tmode, add&&(d==dim-1), 308*82b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 309*82b77998SStan Tomov CeedChk(ierr); 310*82b77998SStan Tomov pre /= P; 311*82b77998SStan Tomov post *= Q; 312*82b77998SStan Tomov } 313*82b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 314*82b77998SStan Tomov v += nqpt; 315*82b77998SStan Tomov } else { 316*82b77998SStan Tomov u += nqpt; 317*82b77998SStan Tomov } 318*82b77998SStan Tomov } 319*82b77998SStan Tomov } 320*82b77998SStan Tomov if (emode & CEED_EVAL_WEIGHT) { 321*82b77998SStan Tomov if (tmode == CEED_TRANSPOSE) 322*82b77998SStan Tomov return CeedError(basis->ceed, 1, 323*82b77998SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 324*82b77998SStan Tomov CeedInt Q = basis->Q1d; 325*82b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 326*82b77998SStan Tomov CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 327*82b77998SStan Tomov for (CeedInt i=0; i<pre; i++) { 328*82b77998SStan Tomov for (CeedInt j=0; j<Q; j++) { 329*82b77998SStan Tomov for (CeedInt k=0; k<post; k++) { 330*82b77998SStan Tomov v[(i*Q + j)*post + k] = basis->qweight1d[j] 331*82b77998SStan Tomov * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 332*82b77998SStan Tomov } 333*82b77998SStan Tomov } 334*82b77998SStan Tomov } 335*82b77998SStan Tomov } 336*82b77998SStan Tomov } 337*82b77998SStan Tomov return 0; 338*82b77998SStan Tomov } 339*82b77998SStan Tomov 340*82b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) { 341*82b77998SStan Tomov return 0; 342*82b77998SStan Tomov } 343*82b77998SStan Tomov 344*82b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 345*82b77998SStan Tomov CeedInt Q1d, const CeedScalar *interp1d, 346*82b77998SStan Tomov const CeedScalar *grad1d, 347*82b77998SStan Tomov const CeedScalar *qref1d, 348*82b77998SStan Tomov const CeedScalar *qweight1d, 349*82b77998SStan Tomov CeedBasis basis) { 350*82b77998SStan Tomov basis->Apply = CeedBasisApply_Magma; 351*82b77998SStan Tomov basis->Destroy = CeedBasisDestroy_Magma; 352*82b77998SStan Tomov return 0; 353*82b77998SStan Tomov } 354*82b77998SStan Tomov 355*82b77998SStan Tomov static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 356*82b77998SStan Tomov const CeedScalar *const *u, 357*82b77998SStan Tomov CeedScalar *const *v) { 358*82b77998SStan Tomov int ierr; 359*82b77998SStan Tomov ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 360*82b77998SStan Tomov return 0; 361*82b77998SStan Tomov } 362*82b77998SStan Tomov 363*82b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 364*82b77998SStan Tomov return 0; 365*82b77998SStan Tomov } 366*82b77998SStan Tomov 367*82b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 368*82b77998SStan Tomov qf->Apply = CeedQFunctionApply_Magma; 369*82b77998SStan Tomov qf->Destroy = CeedQFunctionDestroy_Magma; 370*82b77998SStan Tomov return 0; 371*82b77998SStan Tomov } 372*82b77998SStan Tomov 373*82b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) { 374*82b77998SStan Tomov CeedOperator_Magma *impl = op->data; 375*82b77998SStan Tomov int ierr; 376*82b77998SStan Tomov 377*82b77998SStan Tomov ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 378*82b77998SStan Tomov ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 379*82b77998SStan Tomov ierr = CeedFree(&op->data); CeedChk(ierr); 380*82b77998SStan Tomov return 0; 381*82b77998SStan Tomov } 382*82b77998SStan Tomov 383*82b77998SStan Tomov static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 384*82b77998SStan Tomov CeedVector ustate, 385*82b77998SStan Tomov CeedVector residual, CeedRequest *request) { 386*82b77998SStan Tomov CeedOperator_Magma *impl = op->data; 387*82b77998SStan Tomov CeedVector etmp; 388*82b77998SStan Tomov CeedInt Q; 389*82b77998SStan Tomov const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 390*82b77998SStan Tomov CeedScalar *Eu; 391*82b77998SStan Tomov char *qd; 392*82b77998SStan Tomov int ierr; 393*82b77998SStan Tomov CeedTransposeMode lmode = CEED_NOTRANSPOSE; 394*82b77998SStan Tomov 395*82b77998SStan Tomov if (!impl->etmp) { 396*82b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 397*82b77998SStan Tomov nc * op->Erestrict->nelem * op->Erestrict->elemsize, 398*82b77998SStan Tomov &impl->etmp); CeedChk(ierr); 399*82b77998SStan Tomov // etmp is allocated when CeedVectorGetArray is called below 400*82b77998SStan Tomov } 401*82b77998SStan Tomov etmp = impl->etmp; 402*82b77998SStan Tomov if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 403*82b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 404*82b77998SStan Tomov nc, lmode, ustate, etmp, 405*82b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 406*82b77998SStan Tomov } 407*82b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 408*82b77998SStan Tomov ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 409*82b77998SStan Tomov ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 410*82b77998SStan Tomov CeedChk(ierr); 411*82b77998SStan Tomov for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 412*82b77998SStan Tomov CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 413*82b77998SStan Tomov const CeedScalar *in[5] = {0,0,0,0,0}; 414*82b77998SStan Tomov // TODO: quadrature weights can be computed just once 415*82b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 416*82b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc], BEu); 417*82b77998SStan Tomov CeedChk(ierr); 418*82b77998SStan Tomov CeedScalar *u_ptr = BEu, *v_ptr = BEv; 419*82b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 420*82b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 421*82b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 422*82b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 423*82b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 424*82b77998SStan Tomov ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 425*82b77998SStan Tomov CeedChk(ierr); 426*82b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 427*82b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc]); 428*82b77998SStan Tomov CeedChk(ierr); 429*82b77998SStan Tomov } 430*82b77998SStan Tomov ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 431*82b77998SStan Tomov if (residual) { 432*82b77998SStan Tomov CeedScalar *res; 433*82b77998SStan Tomov CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 434*82b77998SStan Tomov for (int i = 0; i < residual->length; i++) 435*82b77998SStan Tomov res[i] = (CeedScalar)0; 436*82b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 437*82b77998SStan Tomov nc, lmode, etmp, residual, 438*82b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 439*82b77998SStan Tomov } 440*82b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 441*82b77998SStan Tomov *request = NULL; 442*82b77998SStan Tomov return 0; 443*82b77998SStan Tomov } 444*82b77998SStan Tomov 445*82b77998SStan Tomov static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 446*82b77998SStan Tomov CeedOperator_Magma *impl = op->data; 447*82b77998SStan Tomov int ierr; 448*82b77998SStan Tomov 449*82b77998SStan Tomov if (!impl->qdata) { 450*82b77998SStan Tomov CeedInt Q; 451*82b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 452*82b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 453*82b77998SStan Tomov op->Erestrict->nelem * Q 454*82b77998SStan Tomov * op->qf->qdatasize / sizeof(CeedScalar), 455*82b77998SStan Tomov &impl->qdata); CeedChk(ierr); 456*82b77998SStan Tomov } 457*82b77998SStan Tomov *qdata = impl->qdata; 458*82b77998SStan Tomov return 0; 459*82b77998SStan Tomov } 460*82b77998SStan Tomov 461*82b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) { 462*82b77998SStan Tomov CeedOperator_Magma *impl; 463*82b77998SStan Tomov int ierr; 464*82b77998SStan Tomov 465*82b77998SStan Tomov ierr = CeedCalloc(1, &impl); CeedChk(ierr); 466*82b77998SStan Tomov op->data = impl; 467*82b77998SStan Tomov op->Destroy = CeedOperatorDestroy_Magma; 468*82b77998SStan Tomov op->Apply = CeedOperatorApply_Magma; 469*82b77998SStan Tomov op->GetQData = CeedOperatorGetQData_Magma; 470*82b77998SStan Tomov return 0; 471*82b77998SStan Tomov } 472*82b77998SStan Tomov 473*82b77998SStan Tomov // ***************************************************************************** 474*82b77998SStan Tomov // * INIT 475*82b77998SStan Tomov // ***************************************************************************** 476*82b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) { 477*82b77998SStan Tomov if (strcmp(resource, "/cpu/magma") 478*82b77998SStan Tomov && strcmp(resource, "/gpu/magma")) 479*82b77998SStan Tomov return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 480*82b77998SStan Tomov ceed->VecCreate = CeedVectorCreate_Magma; 481*82b77998SStan Tomov ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 482*82b77998SStan Tomov ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 483*82b77998SStan Tomov ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 484*82b77998SStan Tomov ceed->OperatorCreate = CeedOperatorCreate_Magma; 485*82b77998SStan Tomov return 0; 486*82b77998SStan Tomov } 487*82b77998SStan Tomov 488*82b77998SStan Tomov // ***************************************************************************** 489*82b77998SStan Tomov // * REGISTER 490*82b77998SStan Tomov // ***************************************************************************** 491*82b77998SStan Tomov __attribute__((constructor)) 492*82b77998SStan Tomov static void Register(void) { 493*82b77998SStan Tomov CeedRegister("/cpu/magma", CeedInit_Magma); 494*82b77998SStan Tomov CeedRegister("/gpu/magma", CeedInit_Magma); 495*82b77998SStan Tomov } 496