182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details. 482b77998SStan Tomov // 582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 782b77998SStan Tomov // element discretizations for exascale applications. For more information and 882b77998SStan Tomov // source code availability see http://github.com/ceed. 982b77998SStan Tomov // 1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early 1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 1682b77998SStan Tomov 1738612b08SStan Tomov #include "ceed-magma.h" 185a9ca9adSVeselin Dobrev #include <string.h> 1982b77998SStan Tomov 2082b77998SStan Tomov typedef struct { 2182b77998SStan Tomov CeedScalar *array; 2293fbe329SStan Tomov CeedScalar *darray; 23e2900954SStan Tomov int own_; 24c119dad6SStan Tomov int down_; 2582b77998SStan Tomov } CeedVector_Magma; 2682b77998SStan Tomov 2782b77998SStan Tomov typedef struct { 281751b0a9SStan Tomov CeedInt *indices; 291751b0a9SStan Tomov CeedInt *dindices; 301751b0a9SStan Tomov int own_; 31c119dad6SStan Tomov int down_; // cover a case where we own Device memory 3282b77998SStan Tomov } CeedElemRestriction_Magma; 3382b77998SStan Tomov 3482b77998SStan Tomov typedef struct { 353bfcb0deSjeremylt CeedVector 363bfcb0deSjeremylt *evecs; /// E-vectors needed to apply operator (input followed by outputs) 373bfcb0deSjeremylt CeedScalar **edata; 383bfcb0deSjeremylt CeedScalar **qdata; /// Inputs followed by outputs 393bfcb0deSjeremylt CeedScalar 403bfcb0deSjeremylt **qdata_alloc; /// Allocated quadrature data arrays (to be freed by us) 413bfcb0deSjeremylt CeedScalar **indata; 423bfcb0deSjeremylt CeedScalar **outdata; 433bfcb0deSjeremylt CeedInt numein; 443bfcb0deSjeremylt CeedInt numeout; 453bfcb0deSjeremylt CeedInt numqin; 463bfcb0deSjeremylt CeedInt numqout; 4782b77998SStan Tomov } CeedOperator_Magma; 4882b77998SStan Tomov 4993fbe329SStan Tomov // ***************************************************************************** 5093fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode 5193fbe329SStan Tomov // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 5293fbe329SStan Tomov // * to array, and data is copied (not store passed pointer) 5393fbe329SStan Tomov // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 5493fbe329SStan Tomov // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 5593fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 5693fbe329SStan Tomov // ***************************************************************************** 5782b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 5882b77998SStan Tomov CeedCopyMode cmode, CeedScalar *array) { 5982b77998SStan Tomov CeedVector_Magma *impl = vec->data; 6082b77998SStan Tomov int ierr; 6182b77998SStan Tomov 62c119dad6SStan Tomov // If own data, free the "old" data, e.g., as it may be of different size 63e2900954SStan Tomov if (impl->own_) { 64e2900954SStan Tomov magma_free( impl->darray ); 65e2900954SStan Tomov magma_free_pinned( impl->array ); 66e2900954SStan Tomov impl->darray = NULL; 67e2900954SStan Tomov impl->array = NULL; 68e2900954SStan Tomov impl->own_ = 0; 69c119dad6SStan Tomov impl->down_= 0; 70e2900954SStan Tomov } 7193fbe329SStan Tomov 72e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 73e2900954SStan Tomov // memory is on the host; own_ = 0 7482b77998SStan Tomov switch (cmode) { 7582b77998SStan Tomov case CEED_COPY_VALUES: 7693fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 77e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 78e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 79e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 80e2900954SStan Tomov impl->own_ = 1; 81e2900954SStan Tomov 82c119dad6SStan Tomov if (array != NULL) 8393fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 8493fbe329SStan Tomov array, 1, impl->darray, 1); 8582b77998SStan Tomov break; 8682b77998SStan Tomov case CEED_OWN_POINTER: 8793fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 88e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 891751b0a9SStan Tomov // TODO: possible problem here is if we are passed non-pinned memory; 901751b0a9SStan Tomov // (as we own it, lter in destroy, we use free for pinned memory). 91e2900954SStan Tomov impl->array = array; 92e2900954SStan Tomov impl->own_ = 1; 93e2900954SStan Tomov 94c119dad6SStan Tomov if (array != NULL) 9593fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 9693fbe329SStan Tomov array, 1, impl->darray, 1); 9782b77998SStan Tomov break; 9882b77998SStan Tomov case CEED_USE_POINTER: 99c119dad6SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 100c119dad6SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 101c119dad6SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 102c119dad6SStan Tomov array, 1, impl->darray, 1); 10306b636dbSStan Tomov 10406b636dbSStan Tomov impl->down_ = 1; 10582b77998SStan Tomov impl->array = array; 10682b77998SStan Tomov } 107e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 108e2900954SStan Tomov // memory is on the device; own = 0 109e2900954SStan Tomov switch (cmode) { 110e2900954SStan Tomov case CEED_COPY_VALUES: 111e2900954SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 112e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 113e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 114e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 115e2900954SStan Tomov impl->own_ = 1; 116e2900954SStan Tomov 117e2900954SStan Tomov if (array) 118e2900954SStan Tomov magma_copyvector(vec->length, sizeof(array[0]), 119e2900954SStan Tomov array, 1, impl->darray, 1); 12006b636dbSStan Tomov else 12106b636dbSStan Tomov // t30 assumes allocation initializes with 0s 12206b636dbSStan Tomov magma_setvector(vec->length, sizeof(array[0]), 12306b636dbSStan Tomov impl->array, 1, impl->darray, 1); 124e2900954SStan Tomov break; 125e2900954SStan Tomov case CEED_OWN_POINTER: 126e2900954SStan Tomov impl->darray = array; 127e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 128e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 129e2900954SStan Tomov impl->own_ = 1; 130e2900954SStan Tomov 131e2900954SStan Tomov break; 132e2900954SStan Tomov case CEED_USE_POINTER: 133e2900954SStan Tomov impl->darray = array; 134e2900954SStan Tomov impl->array = NULL; 135e2900954SStan Tomov } 136e2900954SStan Tomov 137e2900954SStan Tomov } else 138e2900954SStan Tomov return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 139e2900954SStan Tomov 14082b77998SStan Tomov return 0; 14182b77998SStan Tomov } 14282b77998SStan Tomov 14393fbe329SStan Tomov // ***************************************************************************** 144e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) 14593fbe329SStan Tomov // ***************************************************************************** 14682b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 14782b77998SStan Tomov CeedScalar **array) { 14882b77998SStan Tomov CeedVector_Magma *impl = vec->data; 14982b77998SStan Tomov int ierr; 15082b77998SStan Tomov 151e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 152c119dad6SStan Tomov if (impl->own_) { 153e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 15406b636dbSStan Tomov // TTT - apparantly it doesn't have most up to date data 155e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 156e2900954SStan Tomov impl->darray, 1, impl->array, 1); 15706b636dbSStan Tomov CeedDebug("\033[31m[CeedVectorGetArray_Magma]"); 15806b636dbSStan Tomov //fprintf(stderr,"rrrrrrrrrrrrrrr\n"); 159c119dad6SStan Tomov } else if (impl->array == NULL) { 160c119dad6SStan Tomov // Vector doesn't own the data and was set on GPU 161c119dad6SStan Tomov if (impl->darray == NULL) { 162c119dad6SStan Tomov // call was made just to allocate memory 163c119dad6SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 164c119dad6SStan Tomov CeedChk(ierr); 165c119dad6SStan Tomov } else 166c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 16782b77998SStan Tomov } 16882b77998SStan Tomov *array = impl->array; 169e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 170c119dad6SStan Tomov if (impl->darray == NULL) { 171c119dad6SStan Tomov // Vector doesn't own the data and was set on the CPU 172c119dad6SStan Tomov if (impl->array == NULL) { 173c119dad6SStan Tomov // call was made just to allocate memory 174e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 175e2900954SStan Tomov CeedChk(ierr); 176c119dad6SStan Tomov } else 177c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 178e2900954SStan Tomov } 179e2900954SStan Tomov *array = impl->darray; 180e2900954SStan Tomov } else 181e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 182e2900954SStan Tomov 18382b77998SStan Tomov return 0; 18482b77998SStan Tomov } 18582b77998SStan Tomov 18693fbe329SStan Tomov // ***************************************************************************** 187e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 18893fbe329SStan Tomov // ***************************************************************************** 18982b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 19082b77998SStan Tomov const CeedScalar **array) { 19182b77998SStan Tomov CeedVector_Magma *impl = vec->data; 19282b77998SStan Tomov int ierr; 19382b77998SStan Tomov 194e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 195c119dad6SStan Tomov if (impl->own_) { 196e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 197e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 198e2900954SStan Tomov impl->darray, 1, impl->array, 1); 199c119dad6SStan Tomov } else if (impl->array == NULL) { 200c119dad6SStan Tomov // Vector doesn't own the data and was set on GPU 201c119dad6SStan Tomov if (impl->darray == NULL) { 202c119dad6SStan Tomov // call was made just to allocate memory 203c119dad6SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 204c119dad6SStan Tomov CeedChk(ierr); 205c119dad6SStan Tomov } else 206c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 20782b77998SStan Tomov } 20882b77998SStan Tomov *array = impl->array; 209e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 210c119dad6SStan Tomov if (impl->darray == NULL) { 211c119dad6SStan Tomov // Vector doesn't own the data and was set on the CPU 212c119dad6SStan Tomov if (impl->array == NULL) { 213c119dad6SStan Tomov // call was made just to allocate memory 214e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 215e2900954SStan Tomov CeedChk(ierr); 216c119dad6SStan Tomov } else 217c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 218e2900954SStan Tomov } 219e2900954SStan Tomov *array = impl->darray; 220e2900954SStan Tomov } else 221e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 222e2900954SStan Tomov 22382b77998SStan Tomov return 0; 22482b77998SStan Tomov } 22582b77998SStan Tomov 22693fbe329SStan Tomov // ***************************************************************************** 227e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 228e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 229e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 23093fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 23193fbe329SStan Tomov // * from vec and possibly modified them. 23293fbe329SStan Tomov // ***************************************************************************** 23382b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 23493fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 23593fbe329SStan Tomov 236c119dad6SStan Tomov // Check if the array is a CPU pointer 237c119dad6SStan Tomov if (*array == impl->array) { 238c119dad6SStan Tomov // Update device, if the device pointer is not NULL 239c119dad6SStan Tomov if (impl->darray != NULL) { 24093fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 24193fbe329SStan Tomov *array, 1, impl->darray, 1); 2425a9ca9adSVeselin Dobrev } else { 2435a9ca9adSVeselin Dobrev // nothing to do (case of CPU use pointer) 2445a9ca9adSVeselin Dobrev } 245c119dad6SStan Tomov 246c119dad6SStan Tomov } else if (impl->down_) { 247c119dad6SStan Tomov // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 248c119dad6SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 249c119dad6SStan Tomov impl->darray, 1, impl->array, 1); 250c119dad6SStan Tomov } 251e2900954SStan Tomov 25282b77998SStan Tomov *array = NULL; 25382b77998SStan Tomov return 0; 25482b77998SStan Tomov } 25582b77998SStan Tomov 25693fbe329SStan Tomov // ***************************************************************************** 257e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 258e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 259e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 26093fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 26193fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile 26293fbe329SStan Tomov // * and needs to be restored here. 26393fbe329SStan Tomov // ***************************************************************************** 26482b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 26582b77998SStan Tomov const CeedScalar **array) { 26693fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 26793fbe329SStan Tomov 268c119dad6SStan Tomov // Check if the array is a CPU pointer 269c119dad6SStan Tomov if (*array == impl->array) { 270c119dad6SStan Tomov // Update device, if the device pointer is not NULL 2715a9ca9adSVeselin Dobrev if (impl->darray != NULL) { 27293fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 27393fbe329SStan Tomov *array, 1, impl->darray, 1); 2745a9ca9adSVeselin Dobrev } else { 2755a9ca9adSVeselin Dobrev // nothing to do (case of CPU use pointer) 2765a9ca9adSVeselin Dobrev } 277c119dad6SStan Tomov 278c119dad6SStan Tomov } else if (impl->down_) { 279c119dad6SStan Tomov // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 280c119dad6SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 281c119dad6SStan Tomov impl->darray, 1, impl->array, 1); 282c119dad6SStan Tomov } 283e2900954SStan Tomov 28482b77998SStan Tomov *array = NULL; 28582b77998SStan Tomov return 0; 28682b77998SStan Tomov } 28782b77998SStan Tomov 28882b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) { 28982b77998SStan Tomov CeedVector_Magma *impl = vec->data; 29082b77998SStan Tomov int ierr; 29182b77998SStan Tomov 292e2900954SStan Tomov // Free if we own the data 293e2900954SStan Tomov if (impl->own_) { 294e2900954SStan Tomov ierr = magma_free_pinned(impl->array); CeedChk(ierr); 29593fbe329SStan Tomov ierr = magma_free(impl->darray); CeedChk(ierr); 296c119dad6SStan Tomov } else if (impl->down_) { 297c119dad6SStan Tomov ierr = magma_free(impl->darray); CeedChk(ierr); 298e2900954SStan Tomov } 29982b77998SStan Tomov ierr = CeedFree(&vec->data); CeedChk(ierr); 30082b77998SStan Tomov return 0; 30182b77998SStan Tomov } 30282b77998SStan Tomov 30393fbe329SStan Tomov // ***************************************************************************** 30493fbe329SStan Tomov // * Create vector vec of size n 30593fbe329SStan Tomov // ***************************************************************************** 30682b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 30782b77998SStan Tomov CeedVector_Magma *impl; 30882b77998SStan Tomov int ierr; 30982b77998SStan Tomov 31082b77998SStan Tomov vec->SetArray = CeedVectorSetArray_Magma; 31182b77998SStan Tomov vec->GetArray = CeedVectorGetArray_Magma; 31282b77998SStan Tomov vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 31382b77998SStan Tomov vec->RestoreArray = CeedVectorRestoreArray_Magma; 31482b77998SStan Tomov vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 31582b77998SStan Tomov vec->Destroy = CeedVectorDestroy_Magma; 31682b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 31793fbe329SStan Tomov impl->darray = NULL; 318e2900954SStan Tomov impl->array = NULL; 319e2900954SStan Tomov impl->own_ = 0; 320c119dad6SStan Tomov impl->down_= 0; 32182b77998SStan Tomov vec->data = impl; 32282b77998SStan Tomov return 0; 32382b77998SStan Tomov } 32482b77998SStan Tomov 3253bfcb0deSjeremylt 32693fbe329SStan Tomov // ***************************************************************************** 32793fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u 32893fbe329SStan Tomov // ***************************************************************************** 32982b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 3303bfcb0deSjeremylt CeedTransposeMode tmode, 33182b77998SStan Tomov CeedTransposeMode lmode, CeedVector u, 33282b77998SStan Tomov CeedVector v, CeedRequest *request) { 33382b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 33482b77998SStan Tomov int ierr; 33582b77998SStan Tomov const CeedScalar *uu; 33682b77998SStan Tomov CeedScalar *vv; 3373bfcb0deSjeremylt CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof, 3383bfcb0deSjeremylt ncomp=r->ncomp; 339c119dad6SStan Tomov CeedInt esize = nelem * elemsize; 34006b636dbSStan Tomov 34106b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 342137a0714SStan Tomov CeedInt *dindices = impl->dindices; 343c119dad6SStan Tomov // Get pointers on the device 344c119dad6SStan Tomov ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr); 345c119dad6SStan Tomov ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr); 34606b636dbSStan Tomov #else 34706b636dbSStan Tomov CeedInt *indices = impl->indices; 34806b636dbSStan Tomov ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 34906b636dbSStan Tomov ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 35006b636dbSStan Tomov #endif 351c119dad6SStan Tomov 35282b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 35382b77998SStan Tomov // Perform: v = r * u 35482b77998SStan Tomov if (ncomp == 1) { 35506b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 356c119dad6SStan Tomov magma_template<<i=0:esize>> 357c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 358c119dad6SStan Tomov vv[i] = uu[dindices[i]]; 359e2900954SStan Tomov } 36006b636dbSStan Tomov #else 36106b636dbSStan Tomov for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]]; 36206b636dbSStan Tomov #endif 36382b77998SStan Tomov } else { 36482b77998SStan Tomov // vv is (elemsize x ncomp x nelem), column-major 36582b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 36606b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 36706b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 36806b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) { 36906b636dbSStan Tomov vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d]; 37006b636dbSStan Tomov } 37106b636dbSStan Tomov #else 372c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 37382b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 374c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 375c119dad6SStan Tomov vv[i + elemsize*(d+ncomp*e)] = 376c119dad6SStan Tomov uu[indices[i+elemsize*e]+ndof*d]; 377c119dad6SStan Tomov } 37806b636dbSStan Tomov #endif 37982b77998SStan Tomov } else { // u is (ncomp x ndof), column-major 38006b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 38106b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 38206b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 38306b636dbSStan Tomov vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]]; 38406b636dbSStan Tomov } 38506b636dbSStan Tomov #else 386c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 38782b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 388c119dad6SStan Tomov for (CeedInt i=0; i< elemsize; i++) { 389c119dad6SStan Tomov vv[i + elemsize*(d+ncomp*e)] = 390c119dad6SStan Tomov uu[d+ncomp*indices[i+elemsize*e]]; 391c119dad6SStan Tomov } 39206b636dbSStan Tomov #endif 39382b77998SStan Tomov } 39482b77998SStan Tomov } 39582b77998SStan Tomov } else { 39682b77998SStan Tomov // Note: in transpose mode, we perform: v += r^t * u 39782b77998SStan Tomov if (ncomp == 1) { 398c119dad6SStan Tomov // fprintf(stderr,"3 ---------\n"); 39906b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 400c119dad6SStan Tomov magma_template<<i=0:esize>> 401c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 402c119dad6SStan Tomov magmablas_datomic_add( &vv[dindices[i]], uu[i]); 403c119dad6SStan Tomov } 40406b636dbSStan Tomov #else 40506b636dbSStan Tomov for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i]; 40606b636dbSStan Tomov #endif 407c119dad6SStan Tomov } else { // u is (elemsize x ncomp x nelem) 408c119dad6SStan Tomov fprintf(stderr,"2 ---------\n"); 409c119dad6SStan Tomov 41082b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 41106b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 41206b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 41306b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) { 41406b636dbSStan Tomov magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d], 41506b636dbSStan Tomov uu[i+iend*(d+e*dend)]); 41606b636dbSStan Tomov } 41706b636dbSStan Tomov #else 418c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 41982b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 420c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 421c119dad6SStan Tomov vv[indices[i + elemsize*e]+ndof*d] += 422c119dad6SStan Tomov uu[i + elemsize*(d+e*ncomp)]; 423c119dad6SStan Tomov } 42406b636dbSStan Tomov #endif 42506b636dbSStan Tomov } else { // vv is (ncomp x ndof), column-major 42606b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 427c119dad6SStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 42806b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 42906b636dbSStan Tomov magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]], 430c119dad6SStan Tomov uu[i+iend*(d+e*dend)]); 43182b77998SStan Tomov } 43206b636dbSStan Tomov #else 433c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 43482b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 435c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 436c119dad6SStan Tomov vv[d+ncomp*indices[i + elemsize*e]] += 437c119dad6SStan Tomov uu[i + elemsize*(d+e*ncomp)]; 438c119dad6SStan Tomov } 43906b636dbSStan Tomov #endif 44082b77998SStan Tomov } 44182b77998SStan Tomov } 44282b77998SStan Tomov } 44306b636dbSStan Tomov 44482b77998SStan Tomov ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 44582b77998SStan Tomov ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 446c119dad6SStan Tomov 44782b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 44882b77998SStan Tomov *request = NULL; 44982b77998SStan Tomov return 0; 45082b77998SStan Tomov } 45182b77998SStan Tomov 45282b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 45382b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 45482b77998SStan Tomov int ierr; 45582b77998SStan Tomov 4561751b0a9SStan Tomov // Free if we own the data 4571751b0a9SStan Tomov if (impl->own_) { 4581751b0a9SStan Tomov ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 4591751b0a9SStan Tomov ierr = magma_free(impl->dindices); CeedChk(ierr); 4601981b6e4STzanio } else if (impl->down_) { 461c119dad6SStan Tomov ierr = magma_free(impl->dindices); CeedChk(ierr); 462c119dad6SStan Tomov } 46382b77998SStan Tomov ierr = CeedFree(&r->data); CeedChk(ierr); 46482b77998SStan Tomov return 0; 46582b77998SStan Tomov } 46682b77998SStan Tomov 46782b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 46882b77998SStan Tomov CeedMemType mtype, 46993fbe329SStan Tomov CeedCopyMode cmode, 47093fbe329SStan Tomov const CeedInt *indices) { 4711751b0a9SStan Tomov int ierr, size = r->nelem*r->elemsize; 47282b77998SStan Tomov CeedElemRestriction_Magma *impl; 47382b77998SStan Tomov 4741751b0a9SStan Tomov // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 47582b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 4761751b0a9SStan Tomov impl->dindices = NULL; 4771751b0a9SStan Tomov impl->indices = NULL; 4781751b0a9SStan Tomov impl->own_ = 0; 479c119dad6SStan Tomov impl->down_= 0; 4801751b0a9SStan Tomov 4811751b0a9SStan Tomov if (mtype == CEED_MEM_HOST) { 4821751b0a9SStan Tomov // memory is on the host; own_ = 0 48382b77998SStan Tomov switch (cmode) { 48482b77998SStan Tomov case CEED_COPY_VALUES: 4851751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 4861751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4871751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 4881751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4891751b0a9SStan Tomov impl->own_ = 1; 4901751b0a9SStan Tomov 49106b636dbSStan Tomov if (indices != NULL) { 49206b636dbSStan Tomov memcpy(impl->indices, indices, size * sizeof(indices[0])); 4931751b0a9SStan Tomov magma_setvector(size, sizeof(CeedInt), 49406b636dbSStan Tomov impl->indices, 1, impl->dindices, 1); 49506b636dbSStan Tomov } 49682b77998SStan Tomov break; 49782b77998SStan Tomov case CEED_OWN_POINTER: 4981751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 4991751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5001751b0a9SStan Tomov // TODO: possible problem here is if we are passed non-pinned memory; 5011751b0a9SStan Tomov // (as we own it, lter in destroy, we use free for pinned memory). 502137a0714SStan Tomov impl->indices = (CeedInt *)indices; 5031751b0a9SStan Tomov impl->own_ = 1; 5041751b0a9SStan Tomov 505c119dad6SStan Tomov if (indices != NULL) 5061751b0a9SStan Tomov magma_setvector(size, sizeof(CeedInt), 5071751b0a9SStan Tomov indices, 1, impl->dindices, 1); 50882b77998SStan Tomov break; 50982b77998SStan Tomov case CEED_USE_POINTER: 510c119dad6SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 511c119dad6SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 512c119dad6SStan Tomov magma_setvector(size, sizeof(CeedInt), 513c119dad6SStan Tomov indices, 1, impl->dindices, 1); 514c119dad6SStan Tomov impl->down_ = 1; 515137a0714SStan Tomov impl->indices = (CeedInt *)indices; 51682b77998SStan Tomov } 5171751b0a9SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 5181751b0a9SStan Tomov // memory is on the device; own = 0 5191751b0a9SStan Tomov switch (cmode) { 5201751b0a9SStan Tomov case CEED_COPY_VALUES: 5211751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 5221751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5231751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 5241751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5251751b0a9SStan Tomov impl->own_ = 1; 5261751b0a9SStan Tomov 5271751b0a9SStan Tomov if (indices) 5281751b0a9SStan Tomov magma_copyvector(size, sizeof(CeedInt), 5291751b0a9SStan Tomov indices, 1, impl->dindices, 1); 5301751b0a9SStan Tomov break; 5311751b0a9SStan Tomov case CEED_OWN_POINTER: 532137a0714SStan Tomov impl->dindices = (CeedInt *)indices; 5331751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 5341751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5351751b0a9SStan Tomov impl->own_ = 1; 5361751b0a9SStan Tomov 5371751b0a9SStan Tomov break; 5381751b0a9SStan Tomov case CEED_USE_POINTER: 539137a0714SStan Tomov impl->dindices = (CeedInt *)indices; 5401751b0a9SStan Tomov impl->indices = NULL; 5411751b0a9SStan Tomov } 5421751b0a9SStan Tomov 5431751b0a9SStan Tomov } else 5441751b0a9SStan Tomov return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 5451751b0a9SStan Tomov 54682b77998SStan Tomov r->data = impl; 54782b77998SStan Tomov r->Apply = CeedElemRestrictionApply_Magma; 54882b77998SStan Tomov r->Destroy = CeedElemRestrictionDestroy_Magma; 5491751b0a9SStan Tomov 55082b77998SStan Tomov return 0; 55182b77998SStan Tomov } 55282b77998SStan Tomov 55382b77998SStan Tomov // Contracts on the middle index 55482b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc 55582b77998SStan Tomov // TRANSPOSE: V_ajc = T_bj U_abc 55682b77998SStan Tomov // If Add != 0, "=" is replaced by "+=" 55782b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed, 55882b77998SStan Tomov CeedInt A, CeedInt B, CeedInt C, CeedInt J, 55982b77998SStan Tomov const CeedScalar *t, CeedTransposeMode tmode, 56082b77998SStan Tomov const CeedInt Add, 56182b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 56238612b08SStan Tomov #ifdef USE_MAGMA_BATCH 56338612b08SStan Tomov magma_dtensor_contract(ceed, A, B, C, J, t, tmode, Add, u, v); 56438612b08SStan Tomov #else 56582b77998SStan Tomov CeedInt tstride0 = B, tstride1 = 1; 56682b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 56782b77998SStan Tomov tstride0 = 1; tstride1 = J; 56882b77998SStan Tomov } 56938612b08SStan Tomov CeedDebug("\033[31m[CeedTensorContract] A=%d, J=%d, C=%d, B=%d: %d %d %d", 57038612b08SStan Tomov A,J,C,B,A*J*B*C, C*J*A, C*B*A); 57182b77998SStan Tomov for (CeedInt a=0; a<A; a++) { 57282b77998SStan Tomov for (CeedInt j=0; j<J; j++) { 57382b77998SStan Tomov if (!Add) { 57482b77998SStan Tomov for (CeedInt c=0; c<C; c++) 57582b77998SStan Tomov v[(a*J+j)*C+c] = 0; 57682b77998SStan Tomov } 57782b77998SStan Tomov for (CeedInt b=0; b<B; b++) { 57882b77998SStan Tomov for (CeedInt c=0; c<C; c++) { 57982b77998SStan Tomov v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 58082b77998SStan Tomov } 58182b77998SStan Tomov } 58282b77998SStan Tomov } 58382b77998SStan Tomov } 58438612b08SStan Tomov #endif 58582b77998SStan Tomov return 0; 58682b77998SStan Tomov } 58782b77998SStan Tomov 58882b77998SStan Tomov static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 58982b77998SStan Tomov CeedEvalMode emode, 59082b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 59182b77998SStan Tomov int ierr; 59282b77998SStan Tomov const CeedInt dim = basis->dim; 5930f5de9e9Sjeremylt const CeedInt ncomp = basis->ncomp; 5940f5de9e9Sjeremylt const CeedInt nqpt = ncomp*CeedPowInt(basis->Q1d, dim); 59582b77998SStan Tomov const CeedInt add = (tmode == CEED_TRANSPOSE); 59682b77998SStan Tomov 59738612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d", 5980f5de9e9Sjeremylt ncomp*CeedPowInt(basis->P1d, dim)); 59938612b08SStan Tomov 60082b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 6010f5de9e9Sjeremylt const CeedInt vsize = ncomp*CeedPowInt(basis->P1d, dim); 60282b77998SStan Tomov for (CeedInt i = 0; i < vsize; i++) 60382b77998SStan Tomov v[i] = (CeedScalar) 0; 60482b77998SStan Tomov } 60582b77998SStan Tomov if (emode & CEED_EVAL_INTERP) { 60682b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 60782b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 60882b77998SStan Tomov P = basis->Q1d; Q = basis->P1d; 60982b77998SStan Tomov } 6100f5de9e9Sjeremylt CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1; 6110f5de9e9Sjeremylt CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 61238612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 6130f5de9e9Sjeremylt ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)); 61482b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 61582b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 61682b77998SStan Tomov tmode, add&&(d==dim-1), 61782b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 61882b77998SStan Tomov CeedChk(ierr); 61982b77998SStan Tomov pre /= P; 62082b77998SStan Tomov post *= Q; 62182b77998SStan Tomov } 62282b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 62382b77998SStan Tomov v += nqpt; 62482b77998SStan Tomov } else { 62582b77998SStan Tomov u += nqpt; 62682b77998SStan Tomov } 62782b77998SStan Tomov } 62882b77998SStan Tomov if (emode & CEED_EVAL_GRAD) { 62982b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 63082b77998SStan Tomov // In CEED_NOTRANSPOSE mode: 6310f5de9e9Sjeremylt // u is (P^dim x nc), column-major layout (nc = ncomp) 6320f5de9e9Sjeremylt // v is (Q^dim x nc x dim), column-major layout (nc = ncomp) 63382b77998SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 63482b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 63582b77998SStan Tomov P = basis->Q1d, Q = basis->P1d; 63682b77998SStan Tomov } 6370f5de9e9Sjeremylt CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 63838612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 6390f5de9e9Sjeremylt ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)); 64082b77998SStan Tomov for (CeedInt p = 0; p < dim; p++) { 6410f5de9e9Sjeremylt CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1; 64282b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 64382b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 64482b77998SStan Tomov (p==d)?basis->grad1d:basis->interp1d, 64582b77998SStan Tomov tmode, add&&(d==dim-1), 64682b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 64782b77998SStan Tomov CeedChk(ierr); 64882b77998SStan Tomov pre /= P; 64982b77998SStan Tomov post *= Q; 65082b77998SStan Tomov } 65182b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 65282b77998SStan Tomov v += nqpt; 65382b77998SStan Tomov } else { 65482b77998SStan Tomov u += nqpt; 65582b77998SStan Tomov } 65682b77998SStan Tomov } 65782b77998SStan Tomov } 65882b77998SStan Tomov if (emode & CEED_EVAL_WEIGHT) { 65982b77998SStan Tomov if (tmode == CEED_TRANSPOSE) 66082b77998SStan Tomov return CeedError(basis->ceed, 1, 66182b77998SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 66282b77998SStan Tomov CeedInt Q = basis->Q1d; 66382b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 66482b77998SStan Tomov CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 66582b77998SStan Tomov for (CeedInt i=0; i<pre; i++) { 66682b77998SStan Tomov for (CeedInt j=0; j<Q; j++) { 66782b77998SStan Tomov for (CeedInt k=0; k<post; k++) { 66882b77998SStan Tomov v[(i*Q + j)*post + k] = basis->qweight1d[j] 66982b77998SStan Tomov * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 67082b77998SStan Tomov } 67182b77998SStan Tomov } 67282b77998SStan Tomov } 67382b77998SStan Tomov } 67482b77998SStan Tomov } 67582b77998SStan Tomov return 0; 67682b77998SStan Tomov } 67782b77998SStan Tomov 67882b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) { 67982b77998SStan Tomov return 0; 68082b77998SStan Tomov } 68182b77998SStan Tomov 68282b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 68382b77998SStan Tomov CeedInt Q1d, const CeedScalar *interp1d, 68482b77998SStan Tomov const CeedScalar *grad1d, 68582b77998SStan Tomov const CeedScalar *qref1d, 68682b77998SStan Tomov const CeedScalar *qweight1d, 68782b77998SStan Tomov CeedBasis basis) { 68882b77998SStan Tomov basis->Apply = CeedBasisApply_Magma; 68982b77998SStan Tomov basis->Destroy = CeedBasisDestroy_Magma; 69082b77998SStan Tomov return 0; 69182b77998SStan Tomov } 69282b77998SStan Tomov 6933bfcb0deSjeremylt static int CeedQFunctionApply_Magma(CeedQFunction qf, CeedInt Q, 69482b77998SStan Tomov const CeedScalar *const *u, 69582b77998SStan Tomov CeedScalar *const *v) { 69682b77998SStan Tomov int ierr; 6973bfcb0deSjeremylt ierr = qf->function(qf->ctx, Q, u, v); CeedChk(ierr); 69882b77998SStan Tomov return 0; 69982b77998SStan Tomov } 70082b77998SStan Tomov 70182b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 70282b77998SStan Tomov return 0; 70382b77998SStan Tomov } 70482b77998SStan Tomov 70582b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 70682b77998SStan Tomov qf->Apply = CeedQFunctionApply_Magma; 70782b77998SStan Tomov qf->Destroy = CeedQFunctionDestroy_Magma; 70882b77998SStan Tomov return 0; 70982b77998SStan Tomov } 71082b77998SStan Tomov 71182b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) { 71282b77998SStan Tomov CeedOperator_Magma *impl = op->data; 71382b77998SStan Tomov int ierr; 71482b77998SStan Tomov 7153bfcb0deSjeremylt for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 7163bfcb0deSjeremylt ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 7173bfcb0deSjeremylt } 7183bfcb0deSjeremylt ierr = CeedFree(&impl->evecs); CeedChk(ierr); 7193bfcb0deSjeremylt ierr = CeedFree(&impl->edata); CeedChk(ierr); 7203bfcb0deSjeremylt 7213bfcb0deSjeremylt for (CeedInt i=0; i<impl->numqin+impl->numqout; i++) { 7223bfcb0deSjeremylt ierr = CeedFree(&impl->qdata_alloc[i]); CeedChk(ierr); 7233bfcb0deSjeremylt } 7243bfcb0deSjeremylt ierr = CeedFree(&impl->qdata_alloc); CeedChk(ierr); 7253bfcb0deSjeremylt ierr = CeedFree(&impl->qdata); CeedChk(ierr); 7263bfcb0deSjeremylt 7273bfcb0deSjeremylt ierr = CeedFree(&impl->indata); CeedChk(ierr); 7283bfcb0deSjeremylt ierr = CeedFree(&impl->outdata); CeedChk(ierr); 7293bfcb0deSjeremylt 73082b77998SStan Tomov ierr = CeedFree(&op->data); CeedChk(ierr); 73182b77998SStan Tomov return 0; 73282b77998SStan Tomov } 73382b77998SStan Tomov 7343bfcb0deSjeremylt /* 7353bfcb0deSjeremylt Setup infields or outfields 7363bfcb0deSjeremylt */ 7373bfcb0deSjeremylt static int CeedOperatorSetupFields_Magma(struct CeedQFunctionField qfields[16], 7383bfcb0deSjeremylt struct CeedOperatorField ofields[16], 7393bfcb0deSjeremylt CeedVector *evecs, CeedScalar **qdata, 7403bfcb0deSjeremylt CeedScalar **qdata_alloc, CeedScalar **indata, 7413bfcb0deSjeremylt CeedInt starti, CeedInt starte, 7423bfcb0deSjeremylt CeedInt startq, CeedInt numfields, CeedInt Q) { 7433bfcb0deSjeremylt CeedInt dim, ierr, ie=starte, iq=startq, ncomp; 74482b77998SStan Tomov 7453bfcb0deSjeremylt // Loop over fields 7463bfcb0deSjeremylt for (CeedInt i=0; i<numfields; i++) { 7473bfcb0deSjeremylt if (ofields[i].Erestrict) { 7483bfcb0deSjeremylt ierr = CeedElemRestrictionCreateVector(ofields[i].Erestrict, NULL, &evecs[ie]); 74982b77998SStan Tomov CeedChk(ierr); 7503bfcb0deSjeremylt ie++; 75182b77998SStan Tomov } 7523bfcb0deSjeremylt CeedEvalMode emode = qfields[i].emode; 7533bfcb0deSjeremylt switch(emode) { 7543bfcb0deSjeremylt case CEED_EVAL_NONE: 7553bfcb0deSjeremylt break; // No action 7563bfcb0deSjeremylt case CEED_EVAL_INTERP: 7573bfcb0deSjeremylt ncomp = qfields[i].ncomp; 7583bfcb0deSjeremylt ierr = CeedMalloc(Q*ncomp, &qdata_alloc[iq]); CeedChk(ierr); 7593bfcb0deSjeremylt qdata[i + starti] = qdata_alloc[iq]; 7603bfcb0deSjeremylt iq++; 7613bfcb0deSjeremylt break; 7623bfcb0deSjeremylt case CEED_EVAL_GRAD: 7633bfcb0deSjeremylt ncomp = qfields[i].ncomp; 7643bfcb0deSjeremylt dim = ofields[i].basis->dim; 7653bfcb0deSjeremylt ierr = CeedMalloc(Q*ncomp*dim, &qdata_alloc[iq]); CeedChk(ierr); 7663bfcb0deSjeremylt qdata[i + starti] = qdata_alloc[iq]; 7673bfcb0deSjeremylt iq++; 7683bfcb0deSjeremylt break; 7693bfcb0deSjeremylt case CEED_EVAL_WEIGHT: // Only on input fields 7703bfcb0deSjeremylt ierr = CeedMalloc(Q, &qdata_alloc[iq]); CeedChk(ierr); 7713bfcb0deSjeremylt ierr = CeedBasisApply(ofields[iq].basis, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, 7723bfcb0deSjeremylt NULL, qdata_alloc[iq]); CeedChk(ierr); 7733bfcb0deSjeremylt qdata[i] = qdata_alloc[iq]; 7743bfcb0deSjeremylt indata[i] = qdata[i]; 7753bfcb0deSjeremylt iq++; 7763bfcb0deSjeremylt break; 7773bfcb0deSjeremylt case CEED_EVAL_DIV: 7783bfcb0deSjeremylt break; // Not implimented 7793bfcb0deSjeremylt case CEED_EVAL_CURL: 7803bfcb0deSjeremylt break; // Not implimented 78182b77998SStan Tomov } 7823bfcb0deSjeremylt } 78382b77998SStan Tomov return 0; 78482b77998SStan Tomov } 78582b77998SStan Tomov 7863bfcb0deSjeremylt /* 7873bfcb0deSjeremylt CeedOperator needs to connect all the named fields (be they active or passive) 7883bfcb0deSjeremylt to the named inputs and outputs of its CeedQFunction. 7893bfcb0deSjeremylt */ 7903bfcb0deSjeremylt static int CeedOperatorSetup_Magma(CeedOperator op) { 7913bfcb0deSjeremylt if (op->setupdone) return 0; 7926a35ea15Sjeremylt CeedOperator_Magma *opmagma = op->data; 7933bfcb0deSjeremylt CeedQFunction qf = op->qf; 7943bfcb0deSjeremylt CeedInt Q = op->numqpoints; 79582b77998SStan Tomov int ierr; 79682b77998SStan Tomov 7973bfcb0deSjeremylt // Count infield and outfield array sizes and evectors 7983bfcb0deSjeremylt for (CeedInt i=0; i<qf->numinputfields; i++) { 7993bfcb0deSjeremylt CeedEvalMode emode = qf->inputfields[i].emode; 8006a35ea15Sjeremylt opmagma->numqin += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD) + 8013bfcb0deSjeremylt !! 8023bfcb0deSjeremylt (emode & CEED_EVAL_WEIGHT); 8036a35ea15Sjeremylt opmagma->numein += 8043bfcb0deSjeremylt !!op->inputfields[i].Erestrict; // Need E-vector when restriction exists 80582b77998SStan Tomov } 8063bfcb0deSjeremylt for (CeedInt i=0; i<qf->numoutputfields; i++) { 8073bfcb0deSjeremylt CeedEvalMode emode = qf->outputfields[i].emode; 8086a35ea15Sjeremylt opmagma->numqout += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD); 8096a35ea15Sjeremylt opmagma->numeout += !!op->outputfields[i].Erestrict; 8103bfcb0deSjeremylt } 8113bfcb0deSjeremylt 8123bfcb0deSjeremylt // Allocate 8136a35ea15Sjeremylt ierr = CeedCalloc(opmagma->numein + opmagma->numeout, &opmagma->evecs); 8143bfcb0deSjeremylt CeedChk(ierr); 8156a35ea15Sjeremylt ierr = CeedCalloc(qf->numinputfields + qf->numoutputfields, &opmagma->edata); 8163bfcb0deSjeremylt CeedChk(ierr); 8173bfcb0deSjeremylt 8186a35ea15Sjeremylt ierr = CeedCalloc(opmagma->numqin + opmagma->numqout, &opmagma->qdata_alloc); 8193bfcb0deSjeremylt CeedChk(ierr); 8206a35ea15Sjeremylt ierr = CeedCalloc(qf->numinputfields + qf->numoutputfields, &opmagma->qdata); 8213bfcb0deSjeremylt CeedChk(ierr); 8223bfcb0deSjeremylt 8236a35ea15Sjeremylt ierr = CeedCalloc(16, &opmagma->indata); CeedChk(ierr); 8246a35ea15Sjeremylt ierr = CeedCalloc(16, &opmagma->outdata); CeedChk(ierr); 8253bfcb0deSjeremylt 8263bfcb0deSjeremylt // Set up infield and outfield pointer arrays 8273bfcb0deSjeremylt // Infields 8283bfcb0deSjeremylt ierr = CeedOperatorSetupFields_Magma(qf->inputfields, op->inputfields, 8296a35ea15Sjeremylt opmagma->evecs, opmagma->qdata, opmagma->qdata_alloc, 8306a35ea15Sjeremylt opmagma->indata, 0, 0, 0, 8313bfcb0deSjeremylt qf->numinputfields, Q); CeedChk(ierr); 8323bfcb0deSjeremylt 8333bfcb0deSjeremylt // Outfields 8343bfcb0deSjeremylt ierr = CeedOperatorSetupFields_Magma(qf->outputfields, op->outputfields, 8356a35ea15Sjeremylt opmagma->evecs, opmagma->qdata, opmagma->qdata_alloc, 8366a35ea15Sjeremylt opmagma->indata, qf->numinputfields, opmagma->numein, 8376a35ea15Sjeremylt opmagma->numqin, qf->numoutputfields, Q); CeedChk(ierr); 8383bfcb0deSjeremylt 8393bfcb0deSjeremylt op->setupdone = 1; 8403bfcb0deSjeremylt 8413bfcb0deSjeremylt return 0; 8423bfcb0deSjeremylt } 8433bfcb0deSjeremylt 844*68e7ed8aSjeremylt static int CeedOperatorApply_Magma(CeedOperator op, CeedVector invec, 8453bfcb0deSjeremylt CeedVector outvec, CeedRequest *request) { 846*68e7ed8aSjeremylt CeedOperator_Magma *opmagma = op->data; 8473bfcb0deSjeremylt CeedInt Q = op->numqpoints, elemsize; 8483bfcb0deSjeremylt int ierr; 8493bfcb0deSjeremylt CeedQFunction qf = op->qf; 8503bfcb0deSjeremylt CeedTransposeMode lmode = CEED_NOTRANSPOSE; 8516a35ea15Sjeremylt CeedScalar *vec_temp; 8523bfcb0deSjeremylt 8533bfcb0deSjeremylt // Setup 854*68e7ed8aSjeremylt ierr = CeedOperatorSetup_Magma(op); CeedChk(ierr); 8553bfcb0deSjeremylt 8563bfcb0deSjeremylt // Input Evecs and Restriction 8573bfcb0deSjeremylt for (CeedInt i=0,iein=0; i<qf->numinputfields; i++) { 8583bfcb0deSjeremylt // Restriction 8593bfcb0deSjeremylt if (op->inputfields[i].Erestrict) { 8606a35ea15Sjeremylt // Zero evec 8610f5de9e9Sjeremylt ierr = CeedVectorGetArray(opmagma->evecs[iein], CEED_MEM_HOST, &vec_temp); 8620f5de9e9Sjeremylt CeedChk(ierr); 8636a35ea15Sjeremylt for (CeedInt j=0; j<opmagma->evecs[iein]->length; j++) 8646a35ea15Sjeremylt vec_temp[j] = 0.; 8656a35ea15Sjeremylt ierr = CeedVectorRestoreArray(opmagma->evecs[iein], &vec_temp); CeedChk(ierr); 8663bfcb0deSjeremylt // Passive 8673bfcb0deSjeremylt if (op->inputfields[i].vec) { 8686a35ea15Sjeremylt // Restrict 8693bfcb0deSjeremylt ierr = CeedElemRestrictionApply(op->inputfields[i].Erestrict, CEED_NOTRANSPOSE, 8706a35ea15Sjeremylt lmode, op->inputfields[i].vec, opmagma->evecs[iein], 8713bfcb0deSjeremylt request); CeedChk(ierr); 8726a35ea15Sjeremylt // Get evec 8736a35ea15Sjeremylt ierr = CeedVectorGetArrayRead(opmagma->evecs[iein], CEED_MEM_HOST, 8746a35ea15Sjeremylt (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr); 8753bfcb0deSjeremylt iein++; 8763bfcb0deSjeremylt } else { 8773bfcb0deSjeremylt // Active 8786a35ea15Sjeremylt // Restrict 8796a35ea15Sjeremylt 8803bfcb0deSjeremylt ierr = CeedElemRestrictionApply(op->inputfields[i].Erestrict, CEED_NOTRANSPOSE, 8816a35ea15Sjeremylt lmode, invec, opmagma->evecs[iein], request); CeedChk(ierr); 8826a35ea15Sjeremylt // Get evec 8836a35ea15Sjeremylt ierr = CeedVectorGetArrayRead(opmagma->evecs[iein], CEED_MEM_HOST, 8846a35ea15Sjeremylt (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr); 8853bfcb0deSjeremylt iein++; 8863bfcb0deSjeremylt } 8873bfcb0deSjeremylt } else { 8883bfcb0deSjeremylt // No restriction 8893bfcb0deSjeremylt CeedEvalMode emode = qf->inputfields[i].emode; 8903bfcb0deSjeremylt if (emode & CEED_EVAL_WEIGHT) { 8913bfcb0deSjeremylt } else { 8926a35ea15Sjeremylt // Passive 8936a35ea15Sjeremylt if (op->inputfields[i].vec) { 8943bfcb0deSjeremylt ierr = CeedVectorGetArrayRead(op->inputfields[i].vec, CEED_MEM_HOST, 8956a35ea15Sjeremylt (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr); 8966a35ea15Sjeremylt // Active 8976a35ea15Sjeremylt } else { 8986a35ea15Sjeremylt ierr = CeedVectorGetArrayRead(invec, CEED_MEM_HOST, 8996a35ea15Sjeremylt (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr); 9006a35ea15Sjeremylt } 9013bfcb0deSjeremylt } 9023bfcb0deSjeremylt } 9033bfcb0deSjeremylt } 9043bfcb0deSjeremylt 9053bfcb0deSjeremylt // Output Evecs 9066a35ea15Sjeremylt for (CeedInt i=0,ieout=opmagma->numein; i<qf->numoutputfields; i++) { 9073bfcb0deSjeremylt // Restriction 9083bfcb0deSjeremylt if (op->outputfields[i].Erestrict) { 9096a35ea15Sjeremylt ierr = CeedVectorGetArray(opmagma->evecs[ieout], CEED_MEM_HOST, 9106a35ea15Sjeremylt &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr); 9113bfcb0deSjeremylt ieout++; 9123bfcb0deSjeremylt } else { 9133bfcb0deSjeremylt // No restriction 9143bfcb0deSjeremylt // Passive 9153bfcb0deSjeremylt if (op->inputfields[i].vec) { 9166a35ea15Sjeremylt ierr = CeedVectorGetArray(op->outputfields[i].vec, CEED_MEM_HOST, 9176a35ea15Sjeremylt &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr); 9183bfcb0deSjeremylt } else { 9193bfcb0deSjeremylt // Active 9203bfcb0deSjeremylt ierr = CeedVectorGetArray(outvec, CEED_MEM_HOST, 9216a35ea15Sjeremylt &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr); 9223bfcb0deSjeremylt } 9233bfcb0deSjeremylt } 9243bfcb0deSjeremylt } 9253bfcb0deSjeremylt 9263bfcb0deSjeremylt // Loop through elements 9273bfcb0deSjeremylt for (CeedInt e=0; e<op->numelements; e++) { 9283bfcb0deSjeremylt // Input basis apply if needed 9293bfcb0deSjeremylt for (CeedInt i=0; i<qf->numinputfields; i++) { 9303bfcb0deSjeremylt // Get elemsize 9313bfcb0deSjeremylt if (op->inputfields[i].Erestrict) { 9323bfcb0deSjeremylt elemsize = op->inputfields[i].Erestrict->elemsize; 9333bfcb0deSjeremylt } else { 9343bfcb0deSjeremylt elemsize = Q; 9353bfcb0deSjeremylt } 9363bfcb0deSjeremylt // Get emode, ncomp 9373bfcb0deSjeremylt CeedEvalMode emode = qf->inputfields[i].emode; 9383bfcb0deSjeremylt CeedInt ncomp = qf->inputfields[i].ncomp; 9393bfcb0deSjeremylt // Basis action 9403bfcb0deSjeremylt switch(emode) { 9413bfcb0deSjeremylt case CEED_EVAL_NONE: 9426a35ea15Sjeremylt opmagma->indata[i] = &opmagma->edata[i][e*Q*ncomp]; 9433bfcb0deSjeremylt break; 9443bfcb0deSjeremylt case CEED_EVAL_INTERP: 9453bfcb0deSjeremylt ierr = CeedBasisApply(op->inputfields[i].basis, CEED_NOTRANSPOSE, 9466a35ea15Sjeremylt CEED_EVAL_INTERP, &opmagma->edata[i][e*elemsize*ncomp], opmagma->qdata[i]); 9473bfcb0deSjeremylt CeedChk(ierr); 9486a35ea15Sjeremylt opmagma->indata[i] = opmagma->qdata[i]; 9493bfcb0deSjeremylt break; 9503bfcb0deSjeremylt case CEED_EVAL_GRAD: 9513bfcb0deSjeremylt ierr = CeedBasisApply(op->inputfields[i].basis, CEED_NOTRANSPOSE, 9526a35ea15Sjeremylt CEED_EVAL_GRAD, &opmagma->edata[i][e*elemsize*ncomp], opmagma->qdata[i]); 9533bfcb0deSjeremylt CeedChk(ierr); 9546a35ea15Sjeremylt opmagma->indata[i] = opmagma->qdata[i]; 9553bfcb0deSjeremylt break; 9563bfcb0deSjeremylt case CEED_EVAL_WEIGHT: 9573bfcb0deSjeremylt break; // No action 9583bfcb0deSjeremylt case CEED_EVAL_DIV: 9593bfcb0deSjeremylt break; // Not implimented 9603bfcb0deSjeremylt case CEED_EVAL_CURL: 9613bfcb0deSjeremylt break; // Not implimented 9623bfcb0deSjeremylt } 9633bfcb0deSjeremylt } 9643bfcb0deSjeremylt // Output pointers 9653bfcb0deSjeremylt for (CeedInt i=0; i<qf->numoutputfields; i++) { 9663bfcb0deSjeremylt CeedEvalMode emode = qf->outputfields[i].emode; 9673bfcb0deSjeremylt if (emode == CEED_EVAL_NONE) { 9683bfcb0deSjeremylt CeedInt ncomp = qf->outputfields[i].ncomp; 9696a35ea15Sjeremylt opmagma->outdata[i] = &opmagma->edata[i + qf->numinputfields][e*Q*ncomp]; 9703bfcb0deSjeremylt } 9713bfcb0deSjeremylt } 9723bfcb0deSjeremylt // Q function 9730f5de9e9Sjeremylt ierr = CeedQFunctionApply(op->qf, Q, 9740f5de9e9Sjeremylt (const CeedScalar * const*) opmagma->indata, 9756a35ea15Sjeremylt opmagma->outdata); CeedChk(ierr); 9763bfcb0deSjeremylt 9773bfcb0deSjeremylt // Output basis apply if needed 9783bfcb0deSjeremylt for (CeedInt i=0; i<qf->numoutputfields; i++) { 9793bfcb0deSjeremylt // Get elemsize 9803bfcb0deSjeremylt if (op->outputfields[i].Erestrict) { 9813bfcb0deSjeremylt elemsize = op->outputfields[i].Erestrict->elemsize; 9823bfcb0deSjeremylt } else { 9833bfcb0deSjeremylt elemsize = Q; 9843bfcb0deSjeremylt } 9853bfcb0deSjeremylt // Get emode, ncomp 9863bfcb0deSjeremylt CeedInt ncomp = qf->outputfields[i].ncomp; 9873bfcb0deSjeremylt CeedEvalMode emode = qf->outputfields[i].emode; 9883bfcb0deSjeremylt // Basis action 9893bfcb0deSjeremylt switch(emode) { 9903bfcb0deSjeremylt case CEED_EVAL_NONE: 9913bfcb0deSjeremylt break; // No action 9923bfcb0deSjeremylt case CEED_EVAL_INTERP: 9933bfcb0deSjeremylt ierr = CeedBasisApply(op->outputfields[i].basis, CEED_TRANSPOSE, 9946a35ea15Sjeremylt CEED_EVAL_INTERP, opmagma->outdata[i], 9956a35ea15Sjeremylt &opmagma->edata[i + qf->numinputfields][e*elemsize*ncomp]); CeedChk(ierr); 9963bfcb0deSjeremylt break; 9973bfcb0deSjeremylt case CEED_EVAL_GRAD: 9983bfcb0deSjeremylt ierr = CeedBasisApply(op->outputfields[i].basis, CEED_TRANSPOSE, CEED_EVAL_GRAD, 9996a35ea15Sjeremylt opmagma->outdata[i], &opmagma->edata[i + qf->numinputfields][e*elemsize*ncomp]); 10003bfcb0deSjeremylt CeedChk(ierr); 10013bfcb0deSjeremylt break; 10023bfcb0deSjeremylt case CEED_EVAL_WEIGHT: 10033bfcb0deSjeremylt break; // Should not occur 10043bfcb0deSjeremylt case CEED_EVAL_DIV: 10053bfcb0deSjeremylt break; // Not implimented 10063bfcb0deSjeremylt case CEED_EVAL_CURL: 10073bfcb0deSjeremylt break; // Not implimented 10083bfcb0deSjeremylt } 10093bfcb0deSjeremylt } 10103bfcb0deSjeremylt } 10113bfcb0deSjeremylt 10123bfcb0deSjeremylt // Output restriction 10136a35ea15Sjeremylt for (CeedInt i=0,ieout=opmagma->numein; i<qf->numoutputfields; i++) { 10143bfcb0deSjeremylt // Restriction 10153bfcb0deSjeremylt if (op->outputfields[i].Erestrict) { 10163bfcb0deSjeremylt // Passive 10173bfcb0deSjeremylt if (op->outputfields[i].vec) { 10186a35ea15Sjeremylt // Restore evec 10196a35ea15Sjeremylt ierr = CeedVectorRestoreArray(opmagma->evecs[ieout], 10206a35ea15Sjeremylt &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr); 10216a35ea15Sjeremylt // Zero lvec 10220f5de9e9Sjeremylt ierr = CeedVectorGetArray(op->outputfields[i].vec, CEED_MEM_HOST, &vec_temp); 10230f5de9e9Sjeremylt CeedChk(ierr); 10246a35ea15Sjeremylt for (CeedInt j=0; j<op->outputfields[i].vec->length; j++) 10256a35ea15Sjeremylt vec_temp[j] = 0.; 10260f5de9e9Sjeremylt ierr = CeedVectorRestoreArray(op->outputfields[i].vec, &vec_temp); 10270f5de9e9Sjeremylt CeedChk(ierr); 10286a35ea15Sjeremylt // Restrict 10293bfcb0deSjeremylt ierr = CeedElemRestrictionApply(op->outputfields[i].Erestrict, CEED_TRANSPOSE, 10306a35ea15Sjeremylt lmode, opmagma->evecs[ieout], op->outputfields[i].vec, request); CeedChk(ierr); 10313bfcb0deSjeremylt ieout++; 10323bfcb0deSjeremylt } else { 10333bfcb0deSjeremylt // Active 10346a35ea15Sjeremylt // Restore evec 10356a35ea15Sjeremylt ierr = CeedVectorRestoreArray(opmagma->evecs[ieout], 10366a35ea15Sjeremylt &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr); 10376a35ea15Sjeremylt // Zero lvec 10386a35ea15Sjeremylt ierr = CeedVectorGetArray(outvec, CEED_MEM_HOST, &vec_temp); CeedChk(ierr); 10396a35ea15Sjeremylt for (CeedInt j=0; j<outvec->length; j++) 10406a35ea15Sjeremylt vec_temp[j] = 0.; 10416a35ea15Sjeremylt ierr = CeedVectorRestoreArray(outvec, &vec_temp); CeedChk(ierr); 10426a35ea15Sjeremylt // Restrict 10433bfcb0deSjeremylt ierr = CeedElemRestrictionApply(op->outputfields[i].Erestrict, CEED_TRANSPOSE, 10446a35ea15Sjeremylt lmode, opmagma->evecs[ieout], outvec, request); CeedChk(ierr); 10453bfcb0deSjeremylt ieout++; 10463bfcb0deSjeremylt } 10473bfcb0deSjeremylt } else { 10483bfcb0deSjeremylt // No Restriction 10493bfcb0deSjeremylt // Passive 10503bfcb0deSjeremylt if (op->outputfields[i].vec) { 10513bfcb0deSjeremylt ierr = CeedVectorRestoreArray(op->outputfields[i].vec, 10526a35ea15Sjeremylt &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr); 10533bfcb0deSjeremylt } else { 10543bfcb0deSjeremylt // Active 10556a35ea15Sjeremylt ierr = CeedVectorRestoreArray(outvec, &opmagma->edata[i + qf->numinputfields]); 10563bfcb0deSjeremylt CeedChk(ierr); 10573bfcb0deSjeremylt } 10583bfcb0deSjeremylt } 10593bfcb0deSjeremylt } 10603bfcb0deSjeremylt 10616a35ea15Sjeremylt // Restore input arrays 10626a35ea15Sjeremylt for (CeedInt i=0,iein=0; i<qf->numinputfields; i++) { 10636a35ea15Sjeremylt // Restriction 10646a35ea15Sjeremylt if (op->inputfields[i].Erestrict) { 10656a35ea15Sjeremylt ierr = CeedVectorRestoreArrayRead(opmagma->evecs[iein], 10666a35ea15Sjeremylt (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr); 10676a35ea15Sjeremylt iein++; 10686a35ea15Sjeremylt } else { 10696a35ea15Sjeremylt // No restriction 10706a35ea15Sjeremylt CeedEvalMode emode = qf->inputfields[i].emode; 10716a35ea15Sjeremylt if (emode & CEED_EVAL_WEIGHT) { 10726a35ea15Sjeremylt } else { 10736a35ea15Sjeremylt // Passive 10746a35ea15Sjeremylt if (op->inputfields[i].vec) { 10756a35ea15Sjeremylt ierr = CeedVectorRestoreArrayRead(op->inputfields[i].vec, 10766a35ea15Sjeremylt (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr); 10776a35ea15Sjeremylt // Active 10786a35ea15Sjeremylt } else { 10796a35ea15Sjeremylt ierr = CeedVectorRestoreArrayRead(invec, 10806a35ea15Sjeremylt (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr); 10816a35ea15Sjeremylt 10826a35ea15Sjeremylt } 10836a35ea15Sjeremylt } 10846a35ea15Sjeremylt } 10856a35ea15Sjeremylt } 10866a35ea15Sjeremylt 108782b77998SStan Tomov return 0; 108882b77998SStan Tomov } 108982b77998SStan Tomov 109082b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) { 109182b77998SStan Tomov CeedOperator_Magma *impl; 109282b77998SStan Tomov int ierr; 109382b77998SStan Tomov 109482b77998SStan Tomov ierr = CeedCalloc(1, &impl); CeedChk(ierr); 109582b77998SStan Tomov op->data = impl; 109682b77998SStan Tomov op->Destroy = CeedOperatorDestroy_Magma; 109782b77998SStan Tomov op->Apply = CeedOperatorApply_Magma; 109882b77998SStan Tomov return 0; 109982b77998SStan Tomov } 110082b77998SStan Tomov 110182b77998SStan Tomov // ***************************************************************************** 110282b77998SStan Tomov // * INIT 110382b77998SStan Tomov // ***************************************************************************** 110482b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) { 11051dc2661bSVeselin Dobrev int ierr; 11062a847359SStan Tomov if (strcmp(resource, "/gpu/magma")) 110782b77998SStan Tomov return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 110893fbe329SStan Tomov 11091dc2661bSVeselin Dobrev ierr = magma_init(); 11101dc2661bSVeselin Dobrev if (ierr) return CeedError(ceed, 1, "error in magma_init(): %d\n", ierr); 111193fbe329SStan Tomov //magma_print_environment(); 111293fbe329SStan Tomov 111382b77998SStan Tomov ceed->VecCreate = CeedVectorCreate_Magma; 111482b77998SStan Tomov ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 111582b77998SStan Tomov ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 111682b77998SStan Tomov ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 111782b77998SStan Tomov ceed->OperatorCreate = CeedOperatorCreate_Magma; 111882b77998SStan Tomov return 0; 111982b77998SStan Tomov } 112082b77998SStan Tomov 112182b77998SStan Tomov // ***************************************************************************** 112282b77998SStan Tomov // * REGISTER 112382b77998SStan Tomov // ***************************************************************************** 112482b77998SStan Tomov __attribute__((constructor)) 112582b77998SStan Tomov static void Register(void) { 112644951fc6Sjeremylt CeedRegister("/gpu/magma", CeedInit_Magma, 20); 112782b77998SStan Tomov } 1128