182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details. 482b77998SStan Tomov // 582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 782b77998SStan Tomov // element discretizations for exascale applications. For more information and 882b77998SStan Tomov // source code availability see http://github.com/ceed. 982b77998SStan Tomov // 1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early 1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 1682b77998SStan Tomov 1738612b08SStan Tomov #include "ceed-magma.h" 185a9ca9adSVeselin Dobrev #include <string.h> 1982b77998SStan Tomov 2082b77998SStan Tomov typedef struct { 2182b77998SStan Tomov CeedScalar *array; 2293fbe329SStan Tomov CeedScalar *darray; 23e2900954SStan Tomov int own_; 24c119dad6SStan Tomov int down_; 2582b77998SStan Tomov } CeedVector_Magma; 2682b77998SStan Tomov 2782b77998SStan Tomov typedef struct { 281751b0a9SStan Tomov CeedInt *indices; 291751b0a9SStan Tomov CeedInt *dindices; 301751b0a9SStan Tomov int own_; 31c119dad6SStan Tomov int down_; // cover a case where we own Device memory 3282b77998SStan Tomov } CeedElemRestriction_Magma; 3382b77998SStan Tomov 3482b77998SStan Tomov typedef struct { 3582b77998SStan Tomov CeedVector etmp; 3682b77998SStan Tomov CeedVector qdata; 3782b77998SStan Tomov } CeedOperator_Magma; 3882b77998SStan Tomov 3993fbe329SStan Tomov // ***************************************************************************** 4093fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode 4193fbe329SStan Tomov // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 4293fbe329SStan Tomov // * to array, and data is copied (not store passed pointer) 4393fbe329SStan Tomov // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 4493fbe329SStan Tomov // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 4593fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 4693fbe329SStan Tomov // ***************************************************************************** 4782b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 4882b77998SStan Tomov CeedCopyMode cmode, CeedScalar *array) { 4982b77998SStan Tomov CeedVector_Magma *impl = vec->data; 5082b77998SStan Tomov int ierr; 5182b77998SStan Tomov 52c119dad6SStan Tomov // If own data, free the "old" data, e.g., as it may be of different size 53e2900954SStan Tomov if (impl->own_) { 54e2900954SStan Tomov magma_free( impl->darray ); 55e2900954SStan Tomov magma_free_pinned( impl->array ); 56e2900954SStan Tomov impl->darray = NULL; 57e2900954SStan Tomov impl->array = NULL; 58e2900954SStan Tomov impl->own_ = 0; 59c119dad6SStan Tomov impl->down_= 0; 60e2900954SStan Tomov } 6193fbe329SStan Tomov 62e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 63e2900954SStan Tomov // memory is on the host; own_ = 0 6482b77998SStan Tomov switch (cmode) { 6582b77998SStan Tomov case CEED_COPY_VALUES: 6693fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 67e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 68e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 69e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 70e2900954SStan Tomov impl->own_ = 1; 71e2900954SStan Tomov 72c119dad6SStan Tomov if (array != NULL) 7393fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 7493fbe329SStan Tomov array, 1, impl->darray, 1); 7582b77998SStan Tomov break; 7682b77998SStan Tomov case CEED_OWN_POINTER: 7793fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 78e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 791751b0a9SStan Tomov // TODO: possible problem here is if we are passed non-pinned memory; 801751b0a9SStan Tomov // (as we own it, lter in destroy, we use free for pinned memory). 81e2900954SStan Tomov impl->array = array; 82e2900954SStan Tomov impl->own_ = 1; 83e2900954SStan Tomov 84c119dad6SStan Tomov if (array != NULL) 8593fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 8693fbe329SStan Tomov array, 1, impl->darray, 1); 8782b77998SStan Tomov break; 8882b77998SStan Tomov case CEED_USE_POINTER: 89c119dad6SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 90c119dad6SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 91c119dad6SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 92c119dad6SStan Tomov array, 1, impl->darray, 1); 9306b636dbSStan Tomov 9406b636dbSStan Tomov impl->down_ = 1; 9582b77998SStan Tomov impl->array = array; 9682b77998SStan Tomov } 97e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 98e2900954SStan Tomov // memory is on the device; own = 0 99e2900954SStan Tomov switch (cmode) { 100e2900954SStan Tomov case CEED_COPY_VALUES: 101e2900954SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 102e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 103e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 104e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 105e2900954SStan Tomov impl->own_ = 1; 106e2900954SStan Tomov 107e2900954SStan Tomov if (array) 108e2900954SStan Tomov magma_copyvector(vec->length, sizeof(array[0]), 109e2900954SStan Tomov array, 1, impl->darray, 1); 11006b636dbSStan Tomov else 11106b636dbSStan Tomov // t30 assumes allocation initializes with 0s 11206b636dbSStan Tomov magma_setvector(vec->length, sizeof(array[0]), 11306b636dbSStan Tomov impl->array, 1, impl->darray, 1); 114e2900954SStan Tomov break; 115e2900954SStan Tomov case CEED_OWN_POINTER: 116e2900954SStan Tomov impl->darray = array; 117e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 118e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 119e2900954SStan Tomov impl->own_ = 1; 120e2900954SStan Tomov 121e2900954SStan Tomov break; 122e2900954SStan Tomov case CEED_USE_POINTER: 123e2900954SStan Tomov impl->darray = array; 124e2900954SStan Tomov impl->array = NULL; 125e2900954SStan Tomov } 126e2900954SStan Tomov 127e2900954SStan Tomov } else 128e2900954SStan Tomov return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 129e2900954SStan Tomov 13082b77998SStan Tomov return 0; 13182b77998SStan Tomov } 13282b77998SStan Tomov 13393fbe329SStan Tomov // ***************************************************************************** 134e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) 13593fbe329SStan Tomov // ***************************************************************************** 13682b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 13782b77998SStan Tomov CeedScalar **array) { 13882b77998SStan Tomov CeedVector_Magma *impl = vec->data; 13982b77998SStan Tomov int ierr; 14082b77998SStan Tomov 141e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 142c119dad6SStan Tomov if (impl->own_) { 143e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 14406b636dbSStan Tomov // TTT - apparantly it doesn't have most up to date data 145e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 146e2900954SStan Tomov impl->darray, 1, impl->array, 1); 14706b636dbSStan Tomov CeedDebug("\033[31m[CeedVectorGetArray_Magma]"); 14806b636dbSStan Tomov //fprintf(stderr,"rrrrrrrrrrrrrrr\n"); 149c119dad6SStan Tomov } else if (impl->array == NULL) { 150c119dad6SStan Tomov // Vector doesn't own the data and was set on GPU 151c119dad6SStan Tomov if (impl->darray == NULL) { 152c119dad6SStan Tomov // call was made just to allocate memory 153c119dad6SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 154c119dad6SStan Tomov CeedChk(ierr); 155c119dad6SStan Tomov } else 156c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 15782b77998SStan Tomov } 15882b77998SStan Tomov *array = impl->array; 159e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 160c119dad6SStan Tomov if (impl->darray == NULL) { 161c119dad6SStan Tomov // Vector doesn't own the data and was set on the CPU 162c119dad6SStan Tomov if (impl->array == NULL) { 163c119dad6SStan Tomov // call was made just to allocate memory 164e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 165e2900954SStan Tomov CeedChk(ierr); 166c119dad6SStan Tomov } else 167c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 168e2900954SStan Tomov } 169e2900954SStan Tomov *array = impl->darray; 170e2900954SStan Tomov } else 171e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 172e2900954SStan Tomov 17382b77998SStan Tomov return 0; 17482b77998SStan Tomov } 17582b77998SStan Tomov 17693fbe329SStan Tomov // ***************************************************************************** 177e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 17893fbe329SStan Tomov // ***************************************************************************** 17982b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 18082b77998SStan Tomov const CeedScalar **array) { 18182b77998SStan Tomov CeedVector_Magma *impl = vec->data; 18282b77998SStan Tomov int ierr; 18382b77998SStan Tomov 184e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 185c119dad6SStan Tomov if (impl->own_) { 186e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 187e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 188e2900954SStan Tomov impl->darray, 1, impl->array, 1); 189c119dad6SStan Tomov } else if (impl->array == NULL) { 190c119dad6SStan Tomov // Vector doesn't own the data and was set on GPU 191c119dad6SStan Tomov if (impl->darray == NULL) { 192c119dad6SStan Tomov // call was made just to allocate memory 193c119dad6SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 194c119dad6SStan Tomov CeedChk(ierr); 195c119dad6SStan Tomov } else 196c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 19782b77998SStan Tomov } 19882b77998SStan Tomov *array = impl->array; 199e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 200c119dad6SStan Tomov if (impl->darray == NULL) { 201c119dad6SStan Tomov // Vector doesn't own the data and was set on the CPU 202c119dad6SStan Tomov if (impl->array == NULL) { 203c119dad6SStan Tomov // call was made just to allocate memory 204e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 205e2900954SStan Tomov CeedChk(ierr); 206c119dad6SStan Tomov } else 207c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 208e2900954SStan Tomov } 209e2900954SStan Tomov *array = impl->darray; 210e2900954SStan Tomov } else 211e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 212e2900954SStan Tomov 21382b77998SStan Tomov return 0; 21482b77998SStan Tomov } 21582b77998SStan Tomov 21693fbe329SStan Tomov // ***************************************************************************** 217e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 218e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 219e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 22093fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 22193fbe329SStan Tomov // * from vec and possibly modified them. 22293fbe329SStan Tomov // ***************************************************************************** 22382b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 22493fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 22593fbe329SStan Tomov 226c119dad6SStan Tomov // Check if the array is a CPU pointer 227c119dad6SStan Tomov if (*array == impl->array) { 228c119dad6SStan Tomov // Update device, if the device pointer is not NULL 229c119dad6SStan Tomov if (impl->darray != NULL) { 23093fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 23193fbe329SStan Tomov *array, 1, impl->darray, 1); 2325a9ca9adSVeselin Dobrev } else { 2335a9ca9adSVeselin Dobrev // nothing to do (case of CPU use pointer) 2345a9ca9adSVeselin Dobrev } 235c119dad6SStan Tomov 236c119dad6SStan Tomov } else if (impl->down_) { 237c119dad6SStan Tomov // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 238c119dad6SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 239c119dad6SStan Tomov impl->darray, 1, impl->array, 1); 240c119dad6SStan Tomov } 241e2900954SStan Tomov 24282b77998SStan Tomov *array = NULL; 24382b77998SStan Tomov return 0; 24482b77998SStan Tomov } 24582b77998SStan Tomov 24693fbe329SStan Tomov // ***************************************************************************** 247e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 248e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 249e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 25093fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 25193fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile 25293fbe329SStan Tomov // * and needs to be restored here. 25393fbe329SStan Tomov // ***************************************************************************** 25482b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 25582b77998SStan Tomov const CeedScalar **array) { 25693fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 25793fbe329SStan Tomov 258c119dad6SStan Tomov // Check if the array is a CPU pointer 259c119dad6SStan Tomov if (*array == impl->array) { 260c119dad6SStan Tomov // Update device, if the device pointer is not NULL 2615a9ca9adSVeselin Dobrev if (impl->darray != NULL) { 26293fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 26393fbe329SStan Tomov *array, 1, impl->darray, 1); 2645a9ca9adSVeselin Dobrev } else { 2655a9ca9adSVeselin Dobrev // nothing to do (case of CPU use pointer) 2665a9ca9adSVeselin Dobrev } 267c119dad6SStan Tomov 268c119dad6SStan Tomov } else if (impl->down_) { 269c119dad6SStan Tomov // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 270c119dad6SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 271c119dad6SStan Tomov impl->darray, 1, impl->array, 1); 272c119dad6SStan Tomov } 273e2900954SStan Tomov 27482b77998SStan Tomov *array = NULL; 27582b77998SStan Tomov return 0; 27682b77998SStan Tomov } 27782b77998SStan Tomov 27882b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) { 27982b77998SStan Tomov CeedVector_Magma *impl = vec->data; 28082b77998SStan Tomov int ierr; 28182b77998SStan Tomov 282e2900954SStan Tomov // Free if we own the data 283e2900954SStan Tomov if (impl->own_) { 284e2900954SStan Tomov ierr = magma_free_pinned(impl->array); CeedChk(ierr); 28593fbe329SStan Tomov ierr = magma_free(impl->darray); CeedChk(ierr); 286c119dad6SStan Tomov } else if (impl->down_) { 287c119dad6SStan Tomov ierr = magma_free(impl->darray); CeedChk(ierr); 288e2900954SStan Tomov } 28982b77998SStan Tomov ierr = CeedFree(&vec->data); CeedChk(ierr); 29082b77998SStan Tomov return 0; 29182b77998SStan Tomov } 29282b77998SStan Tomov 29393fbe329SStan Tomov // ***************************************************************************** 29493fbe329SStan Tomov // * Create vector vec of size n 29593fbe329SStan Tomov // ***************************************************************************** 29682b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 29782b77998SStan Tomov CeedVector_Magma *impl; 29882b77998SStan Tomov int ierr; 29982b77998SStan Tomov 30082b77998SStan Tomov vec->SetArray = CeedVectorSetArray_Magma; 30182b77998SStan Tomov vec->GetArray = CeedVectorGetArray_Magma; 30282b77998SStan Tomov vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 30382b77998SStan Tomov vec->RestoreArray = CeedVectorRestoreArray_Magma; 30482b77998SStan Tomov vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 30582b77998SStan Tomov vec->Destroy = CeedVectorDestroy_Magma; 30682b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 30793fbe329SStan Tomov impl->darray = NULL; 308e2900954SStan Tomov impl->array = NULL; 309e2900954SStan Tomov impl->own_ = 0; 310c119dad6SStan Tomov impl->down_= 0; 31182b77998SStan Tomov vec->data = impl; 31282b77998SStan Tomov return 0; 31382b77998SStan Tomov } 31482b77998SStan Tomov 31593fbe329SStan Tomov // ***************************************************************************** 31693fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u 31793fbe329SStan Tomov // ***************************************************************************** 31882b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 31982b77998SStan Tomov CeedTransposeMode tmode, CeedInt ncomp, 32082b77998SStan Tomov CeedTransposeMode lmode, CeedVector u, 32182b77998SStan Tomov CeedVector v, CeedRequest *request) { 32282b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 32382b77998SStan Tomov int ierr; 32482b77998SStan Tomov const CeedScalar *uu; 32582b77998SStan Tomov CeedScalar *vv; 326c119dad6SStan Tomov CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof; 327c119dad6SStan Tomov CeedInt esize = nelem * elemsize; 32806b636dbSStan Tomov 32906b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 330137a0714SStan Tomov CeedInt *dindices = impl->dindices; 331c119dad6SStan Tomov // Get pointers on the device 332c119dad6SStan Tomov ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr); 333c119dad6SStan Tomov ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr); 33406b636dbSStan Tomov #else 33506b636dbSStan Tomov CeedInt *indices = impl->indices; 33606b636dbSStan Tomov ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 33706b636dbSStan Tomov ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 33806b636dbSStan Tomov #endif 339c119dad6SStan Tomov 34082b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 34182b77998SStan Tomov // Perform: v = r * u 34282b77998SStan Tomov if (ncomp == 1) { 34306b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 344c119dad6SStan Tomov magma_template<<i=0:esize>> 345c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 346c119dad6SStan Tomov vv[i] = uu[dindices[i]]; 347e2900954SStan Tomov } 34806b636dbSStan Tomov #else 34906b636dbSStan Tomov for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]]; 35006b636dbSStan Tomov #endif 35182b77998SStan Tomov } else { 35282b77998SStan Tomov // vv is (elemsize x ncomp x nelem), column-major 35382b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 35406b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 35506b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 35606b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) { 35706b636dbSStan Tomov vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d]; 35806b636dbSStan Tomov } 35906b636dbSStan Tomov #else 360c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 36182b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 362c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 363c119dad6SStan Tomov vv[i + elemsize*(d+ncomp*e)] = 364c119dad6SStan Tomov uu[indices[i+elemsize*e]+ndof*d]; 365c119dad6SStan Tomov } 36606b636dbSStan Tomov #endif 36782b77998SStan Tomov } else { // u is (ncomp x ndof), column-major 36806b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 36906b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 37006b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 37106b636dbSStan Tomov vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]]; 37206b636dbSStan Tomov } 37306b636dbSStan Tomov #else 374c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 37582b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 376c119dad6SStan Tomov for (CeedInt i=0; i< elemsize; i++) { 377c119dad6SStan Tomov vv[i + elemsize*(d+ncomp*e)] = 378c119dad6SStan Tomov uu[d+ncomp*indices[i+elemsize*e]]; 379c119dad6SStan Tomov } 38006b636dbSStan Tomov #endif 38182b77998SStan Tomov } 38282b77998SStan Tomov } 38382b77998SStan Tomov } else { 38482b77998SStan Tomov // Note: in transpose mode, we perform: v += r^t * u 38582b77998SStan Tomov if (ncomp == 1) { 386c119dad6SStan Tomov // fprintf(stderr,"3 ---------\n"); 38706b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 388c119dad6SStan Tomov magma_template<<i=0:esize>> 389c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 390c119dad6SStan Tomov magmablas_datomic_add( &vv[dindices[i]], uu[i]); 391c119dad6SStan Tomov } 39206b636dbSStan Tomov #else 39306b636dbSStan Tomov for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i]; 39406b636dbSStan Tomov #endif 395c119dad6SStan Tomov } else { // u is (elemsize x ncomp x nelem) 396c119dad6SStan Tomov fprintf(stderr,"2 ---------\n"); 397c119dad6SStan Tomov 39882b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 39906b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 40006b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 40106b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) { 40206b636dbSStan Tomov magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d], 40306b636dbSStan Tomov uu[i+iend*(d+e*dend)]); 40406b636dbSStan Tomov } 40506b636dbSStan Tomov #else 406c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 40782b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 408c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 409c119dad6SStan Tomov vv[indices[i + elemsize*e]+ndof*d] += 410c119dad6SStan Tomov uu[i + elemsize*(d+e*ncomp)]; 411c119dad6SStan Tomov } 41206b636dbSStan Tomov #endif 41306b636dbSStan Tomov } else { // vv is (ncomp x ndof), column-major 41406b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2 415c119dad6SStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 41606b636dbSStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 41706b636dbSStan Tomov magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]], 418c119dad6SStan Tomov uu[i+iend*(d+e*dend)]); 41982b77998SStan Tomov } 42006b636dbSStan Tomov #else 421c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 42282b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 423c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 424c119dad6SStan Tomov vv[d+ncomp*indices[i + elemsize*e]] += 425c119dad6SStan Tomov uu[i + elemsize*(d+e*ncomp)]; 426c119dad6SStan Tomov } 42706b636dbSStan Tomov #endif 42882b77998SStan Tomov } 42982b77998SStan Tomov } 43082b77998SStan Tomov } 43106b636dbSStan Tomov 43282b77998SStan Tomov ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 43382b77998SStan Tomov ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 434c119dad6SStan Tomov 43582b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 43682b77998SStan Tomov *request = NULL; 43782b77998SStan Tomov return 0; 43882b77998SStan Tomov } 43982b77998SStan Tomov 44082b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 44182b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 44282b77998SStan Tomov int ierr; 44382b77998SStan Tomov 4441751b0a9SStan Tomov // Free if we own the data 4451751b0a9SStan Tomov if (impl->own_) { 4461751b0a9SStan Tomov ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 4471751b0a9SStan Tomov ierr = magma_free(impl->dindices); CeedChk(ierr); 4481981b6e4STzanio } else if (impl->down_) { 449c119dad6SStan Tomov ierr = magma_free(impl->dindices); CeedChk(ierr); 450c119dad6SStan Tomov } 45182b77998SStan Tomov ierr = CeedFree(&r->data); CeedChk(ierr); 45282b77998SStan Tomov return 0; 45382b77998SStan Tomov } 45482b77998SStan Tomov 45582b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 45682b77998SStan Tomov CeedMemType mtype, 45793fbe329SStan Tomov CeedCopyMode cmode, 45893fbe329SStan Tomov const CeedInt *indices) { 4591751b0a9SStan Tomov int ierr, size = r->nelem*r->elemsize; 46082b77998SStan Tomov CeedElemRestriction_Magma *impl; 46182b77998SStan Tomov 4621751b0a9SStan Tomov // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 46382b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 4641751b0a9SStan Tomov impl->dindices = NULL; 4651751b0a9SStan Tomov impl->indices = NULL; 4661751b0a9SStan Tomov impl->own_ = 0; 467c119dad6SStan Tomov impl->down_= 0; 4681751b0a9SStan Tomov 4691751b0a9SStan Tomov if (mtype == CEED_MEM_HOST) { 4701751b0a9SStan Tomov // memory is on the host; own_ = 0 47182b77998SStan Tomov switch (cmode) { 47282b77998SStan Tomov case CEED_COPY_VALUES: 4731751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 4741751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4751751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 4761751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4771751b0a9SStan Tomov impl->own_ = 1; 4781751b0a9SStan Tomov 47906b636dbSStan Tomov if (indices != NULL) { 48006b636dbSStan Tomov memcpy(impl->indices, indices, size * sizeof(indices[0])); 4811751b0a9SStan Tomov magma_setvector(size, sizeof(CeedInt), 48206b636dbSStan Tomov impl->indices, 1, impl->dindices, 1); 48306b636dbSStan Tomov } 48482b77998SStan Tomov break; 48582b77998SStan Tomov case CEED_OWN_POINTER: 4861751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 4871751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4881751b0a9SStan Tomov // TODO: possible problem here is if we are passed non-pinned memory; 4891751b0a9SStan Tomov // (as we own it, lter in destroy, we use free for pinned memory). 490137a0714SStan Tomov impl->indices = (CeedInt *)indices; 4911751b0a9SStan Tomov impl->own_ = 1; 4921751b0a9SStan Tomov 493c119dad6SStan Tomov if (indices != NULL) 4941751b0a9SStan Tomov magma_setvector(size, sizeof(CeedInt), 4951751b0a9SStan Tomov indices, 1, impl->dindices, 1); 49682b77998SStan Tomov break; 49782b77998SStan Tomov case CEED_USE_POINTER: 498c119dad6SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 499c119dad6SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 500c119dad6SStan Tomov magma_setvector(size, sizeof(CeedInt), 501c119dad6SStan Tomov indices, 1, impl->dindices, 1); 502c119dad6SStan Tomov impl->down_ = 1; 503137a0714SStan Tomov impl->indices = (CeedInt *)indices; 50482b77998SStan Tomov } 5051751b0a9SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 5061751b0a9SStan Tomov // memory is on the device; own = 0 5071751b0a9SStan Tomov switch (cmode) { 5081751b0a9SStan Tomov case CEED_COPY_VALUES: 5091751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 5101751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5111751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 5121751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5131751b0a9SStan Tomov impl->own_ = 1; 5141751b0a9SStan Tomov 5151751b0a9SStan Tomov if (indices) 5161751b0a9SStan Tomov magma_copyvector(size, sizeof(CeedInt), 5171751b0a9SStan Tomov indices, 1, impl->dindices, 1); 5181751b0a9SStan Tomov break; 5191751b0a9SStan Tomov case CEED_OWN_POINTER: 520137a0714SStan Tomov impl->dindices = (CeedInt *)indices; 5211751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 5221751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5231751b0a9SStan Tomov impl->own_ = 1; 5241751b0a9SStan Tomov 5251751b0a9SStan Tomov break; 5261751b0a9SStan Tomov case CEED_USE_POINTER: 527137a0714SStan Tomov impl->dindices = (CeedInt *)indices; 5281751b0a9SStan Tomov impl->indices = NULL; 5291751b0a9SStan Tomov } 5301751b0a9SStan Tomov 5311751b0a9SStan Tomov } else 5321751b0a9SStan Tomov return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 5331751b0a9SStan Tomov 53482b77998SStan Tomov r->data = impl; 53582b77998SStan Tomov r->Apply = CeedElemRestrictionApply_Magma; 53682b77998SStan Tomov r->Destroy = CeedElemRestrictionDestroy_Magma; 5371751b0a9SStan Tomov 53882b77998SStan Tomov return 0; 53982b77998SStan Tomov } 54082b77998SStan Tomov 54182b77998SStan Tomov // Contracts on the middle index 54282b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc 54382b77998SStan Tomov // TRANSPOSE: V_ajc = T_bj U_abc 54482b77998SStan Tomov // If Add != 0, "=" is replaced by "+=" 54582b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed, 54682b77998SStan Tomov CeedInt A, CeedInt B, CeedInt C, CeedInt J, 54782b77998SStan Tomov const CeedScalar *t, CeedTransposeMode tmode, 54882b77998SStan Tomov const CeedInt Add, 54982b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 55038612b08SStan Tomov #ifdef USE_MAGMA_BATCH 55138612b08SStan Tomov magma_dtensor_contract(ceed, A, B, C, J, t, tmode, Add, u, v); 55238612b08SStan Tomov #else 55382b77998SStan Tomov CeedInt tstride0 = B, tstride1 = 1; 55482b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 55582b77998SStan Tomov tstride0 = 1; tstride1 = J; 55682b77998SStan Tomov } 55738612b08SStan Tomov CeedDebug("\033[31m[CeedTensorContract] A=%d, J=%d, C=%d, B=%d: %d %d %d", 55838612b08SStan Tomov A,J,C,B,A*J*B*C, C*J*A, C*B*A); 55982b77998SStan Tomov for (CeedInt a=0; a<A; a++) { 56082b77998SStan Tomov for (CeedInt j=0; j<J; j++) { 56182b77998SStan Tomov if (!Add) { 56282b77998SStan Tomov for (CeedInt c=0; c<C; c++) 56382b77998SStan Tomov v[(a*J+j)*C+c] = 0; 56482b77998SStan Tomov } 56582b77998SStan Tomov for (CeedInt b=0; b<B; b++) { 56682b77998SStan Tomov for (CeedInt c=0; c<C; c++) { 56782b77998SStan Tomov v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 56882b77998SStan Tomov } 56982b77998SStan Tomov } 57082b77998SStan Tomov } 57182b77998SStan Tomov } 57238612b08SStan Tomov #endif 57382b77998SStan Tomov return 0; 57482b77998SStan Tomov } 57582b77998SStan Tomov 57682b77998SStan Tomov static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 57782b77998SStan Tomov CeedEvalMode emode, 57882b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 57982b77998SStan Tomov int ierr; 58082b77998SStan Tomov const CeedInt dim = basis->dim; 58182b77998SStan Tomov const CeedInt ndof = basis->ndof; 58282b77998SStan Tomov const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 58382b77998SStan Tomov const CeedInt add = (tmode == CEED_TRANSPOSE); 58482b77998SStan Tomov 58538612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d", 58638612b08SStan Tomov ndof*CeedPowInt(basis->P1d, dim)); 58738612b08SStan Tomov 58882b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 58982b77998SStan Tomov const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 59082b77998SStan Tomov for (CeedInt i = 0; i < vsize; i++) 59182b77998SStan Tomov v[i] = (CeedScalar) 0; 59282b77998SStan Tomov } 59382b77998SStan Tomov if (emode & CEED_EVAL_INTERP) { 59482b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 59582b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 59682b77998SStan Tomov P = basis->Q1d; Q = basis->P1d; 59782b77998SStan Tomov } 59882b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 59982b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 60038612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 60138612b08SStan Tomov ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)); 60282b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 60382b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 60482b77998SStan Tomov tmode, add&&(d==dim-1), 60582b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 60682b77998SStan Tomov CeedChk(ierr); 60782b77998SStan Tomov pre /= P; 60882b77998SStan Tomov post *= Q; 60982b77998SStan Tomov } 61082b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 61182b77998SStan Tomov v += nqpt; 61282b77998SStan Tomov } else { 61382b77998SStan Tomov u += nqpt; 61482b77998SStan Tomov } 61582b77998SStan Tomov } 61682b77998SStan Tomov if (emode & CEED_EVAL_GRAD) { 61782b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 61882b77998SStan Tomov // In CEED_NOTRANSPOSE mode: 61982b77998SStan Tomov // u is (P^dim x nc), column-major layout (nc = ndof) 62082b77998SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 62182b77998SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 62282b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 62382b77998SStan Tomov P = basis->Q1d, Q = basis->P1d; 62482b77998SStan Tomov } 62582b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 62638612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 62738612b08SStan Tomov ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)); 62882b77998SStan Tomov for (CeedInt p = 0; p < dim; p++) { 62982b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 63082b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 63182b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 63282b77998SStan Tomov (p==d)?basis->grad1d:basis->interp1d, 63382b77998SStan Tomov tmode, add&&(d==dim-1), 63482b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 63582b77998SStan Tomov CeedChk(ierr); 63682b77998SStan Tomov pre /= P; 63782b77998SStan Tomov post *= Q; 63882b77998SStan Tomov } 63982b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 64082b77998SStan Tomov v += nqpt; 64182b77998SStan Tomov } else { 64282b77998SStan Tomov u += nqpt; 64382b77998SStan Tomov } 64482b77998SStan Tomov } 64582b77998SStan Tomov } 64682b77998SStan Tomov if (emode & CEED_EVAL_WEIGHT) { 64782b77998SStan Tomov if (tmode == CEED_TRANSPOSE) 64882b77998SStan Tomov return CeedError(basis->ceed, 1, 64982b77998SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 65082b77998SStan Tomov CeedInt Q = basis->Q1d; 65182b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 65282b77998SStan Tomov CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 65382b77998SStan Tomov for (CeedInt i=0; i<pre; i++) { 65482b77998SStan Tomov for (CeedInt j=0; j<Q; j++) { 65582b77998SStan Tomov for (CeedInt k=0; k<post; k++) { 65682b77998SStan Tomov v[(i*Q + j)*post + k] = basis->qweight1d[j] 65782b77998SStan Tomov * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 65882b77998SStan Tomov } 65982b77998SStan Tomov } 66082b77998SStan Tomov } 66182b77998SStan Tomov } 66282b77998SStan Tomov } 66382b77998SStan Tomov return 0; 66482b77998SStan Tomov } 66582b77998SStan Tomov 66682b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) { 66782b77998SStan Tomov return 0; 66882b77998SStan Tomov } 66982b77998SStan Tomov 67082b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 67182b77998SStan Tomov CeedInt Q1d, const CeedScalar *interp1d, 67282b77998SStan Tomov const CeedScalar *grad1d, 67382b77998SStan Tomov const CeedScalar *qref1d, 67482b77998SStan Tomov const CeedScalar *qweight1d, 67582b77998SStan Tomov CeedBasis basis) { 67682b77998SStan Tomov basis->Apply = CeedBasisApply_Magma; 67782b77998SStan Tomov basis->Destroy = CeedBasisDestroy_Magma; 67882b77998SStan Tomov return 0; 67982b77998SStan Tomov } 68082b77998SStan Tomov 68182b77998SStan Tomov static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 68282b77998SStan Tomov const CeedScalar *const *u, 68382b77998SStan Tomov CeedScalar *const *v) { 68482b77998SStan Tomov int ierr; 68582b77998SStan Tomov ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 68682b77998SStan Tomov return 0; 68782b77998SStan Tomov } 68882b77998SStan Tomov 68982b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 69082b77998SStan Tomov return 0; 69182b77998SStan Tomov } 69282b77998SStan Tomov 69382b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 69482b77998SStan Tomov qf->Apply = CeedQFunctionApply_Magma; 69582b77998SStan Tomov qf->Destroy = CeedQFunctionDestroy_Magma; 69682b77998SStan Tomov return 0; 69782b77998SStan Tomov } 69882b77998SStan Tomov 69982b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) { 70082b77998SStan Tomov CeedOperator_Magma *impl = op->data; 70182b77998SStan Tomov int ierr; 70282b77998SStan Tomov 70382b77998SStan Tomov ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 70482b77998SStan Tomov ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 70582b77998SStan Tomov ierr = CeedFree(&op->data); CeedChk(ierr); 70682b77998SStan Tomov return 0; 70782b77998SStan Tomov } 70882b77998SStan Tomov 70982b77998SStan Tomov static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 71082b77998SStan Tomov CeedVector ustate, 71182b77998SStan Tomov CeedVector residual, CeedRequest *request) { 71282b77998SStan Tomov CeedOperator_Magma *impl = op->data; 71382b77998SStan Tomov CeedVector etmp; 71482b77998SStan Tomov CeedInt Q; 71582b77998SStan Tomov const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 71682b77998SStan Tomov CeedScalar *Eu; 71782b77998SStan Tomov char *qd; 71882b77998SStan Tomov int ierr; 71982b77998SStan Tomov CeedTransposeMode lmode = CEED_NOTRANSPOSE; 72082b77998SStan Tomov 72182b77998SStan Tomov if (!impl->etmp) { 72282b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 72382b77998SStan Tomov nc * op->Erestrict->nelem * op->Erestrict->elemsize, 72482b77998SStan Tomov &impl->etmp); CeedChk(ierr); 72582b77998SStan Tomov // etmp is allocated when CeedVectorGetArray is called below 72682b77998SStan Tomov } 72782b77998SStan Tomov etmp = impl->etmp; 72882b77998SStan Tomov if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 72982b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 73082b77998SStan Tomov nc, lmode, ustate, etmp, 73182b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 73282b77998SStan Tomov } 73382b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 73482b77998SStan Tomov ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 73582b77998SStan Tomov ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 73682b77998SStan Tomov CeedChk(ierr); 73738612b08SStan Tomov 73882b77998SStan Tomov for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 73982b77998SStan Tomov CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 74082b77998SStan Tomov const CeedScalar *in[5] = {0,0,0,0,0}; 74182b77998SStan Tomov // TODO: quadrature weights can be computed just once 74238612b08SStan Tomov CeedDebug("\033[11m[CeedOperatorApply_Magma] e=%d: Eu+%d, %d", 74338612b08SStan Tomov e, e*op->Erestrict->elemsize*nc, Q*nc*(dim+2)); 74482b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 74582b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc], BEu); 74682b77998SStan Tomov CeedChk(ierr); 74782b77998SStan Tomov CeedScalar *u_ptr = BEu, *v_ptr = BEv; 74882b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 74982b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 75082b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 75182b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 75282b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 75382b77998SStan Tomov ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 75482b77998SStan Tomov CeedChk(ierr); 75538612b08SStan Tomov CeedDebug("\033[31m[CeedOperatorApply_Magma] e=%d: ",e); 75682b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 75782b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc]); 75882b77998SStan Tomov CeedChk(ierr); 75982b77998SStan Tomov } 76082b77998SStan Tomov ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 76106b636dbSStan Tomov // qdata must be restored 76206b636dbSStan Tomov ierr = CeedVectorRestoreArray(qdata, (CeedScalar**)&qd); CeedChk(ierr); 76382b77998SStan Tomov if (residual) { 76482b77998SStan Tomov CeedScalar *res; 76582b77998SStan Tomov CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 76682b77998SStan Tomov for (int i = 0; i < residual->length; i++) 76782b77998SStan Tomov res[i] = (CeedScalar)0; 76806b636dbSStan Tomov // residual must be restored 76906b636dbSStan Tomov CeedVectorRestoreArray(residual, &res); 77082b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 77182b77998SStan Tomov nc, lmode, etmp, residual, 77282b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 77382b77998SStan Tomov } 77482b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 77582b77998SStan Tomov *request = NULL; 77682b77998SStan Tomov return 0; 77782b77998SStan Tomov } 77882b77998SStan Tomov 77982b77998SStan Tomov static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 78082b77998SStan Tomov CeedOperator_Magma *impl = op->data; 78182b77998SStan Tomov int ierr; 78282b77998SStan Tomov 78382b77998SStan Tomov if (!impl->qdata) { 78482b77998SStan Tomov CeedInt Q; 78582b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 78682b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 78782b77998SStan Tomov op->Erestrict->nelem * Q 78882b77998SStan Tomov * op->qf->qdatasize / sizeof(CeedScalar), 78982b77998SStan Tomov &impl->qdata); CeedChk(ierr); 79082b77998SStan Tomov } 79182b77998SStan Tomov *qdata = impl->qdata; 79282b77998SStan Tomov return 0; 79382b77998SStan Tomov } 79482b77998SStan Tomov 79582b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) { 79682b77998SStan Tomov CeedOperator_Magma *impl; 79782b77998SStan Tomov int ierr; 79882b77998SStan Tomov 79982b77998SStan Tomov ierr = CeedCalloc(1, &impl); CeedChk(ierr); 80082b77998SStan Tomov op->data = impl; 80182b77998SStan Tomov op->Destroy = CeedOperatorDestroy_Magma; 80282b77998SStan Tomov op->Apply = CeedOperatorApply_Magma; 80382b77998SStan Tomov op->GetQData = CeedOperatorGetQData_Magma; 80482b77998SStan Tomov return 0; 80582b77998SStan Tomov } 80682b77998SStan Tomov 80782b77998SStan Tomov // ***************************************************************************** 80882b77998SStan Tomov // * INIT 80982b77998SStan Tomov // ***************************************************************************** 81082b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) { 8111dc2661bSVeselin Dobrev int ierr; 8122a847359SStan Tomov if (strcmp(resource, "/gpu/magma")) 81382b77998SStan Tomov return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 81493fbe329SStan Tomov 8151dc2661bSVeselin Dobrev ierr = magma_init(); 8161dc2661bSVeselin Dobrev if (ierr) return CeedError(ceed, 1, "error in magma_init(): %d\n", ierr); 81793fbe329SStan Tomov //magma_print_environment(); 81893fbe329SStan Tomov 81982b77998SStan Tomov ceed->VecCreate = CeedVectorCreate_Magma; 82082b77998SStan Tomov ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 82182b77998SStan Tomov ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 82282b77998SStan Tomov ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 82382b77998SStan Tomov ceed->OperatorCreate = CeedOperatorCreate_Magma; 82482b77998SStan Tomov return 0; 82582b77998SStan Tomov } 82682b77998SStan Tomov 82782b77998SStan Tomov // ***************************************************************************** 82882b77998SStan Tomov // * REGISTER 82982b77998SStan Tomov // ***************************************************************************** 83082b77998SStan Tomov __attribute__((constructor)) 83182b77998SStan Tomov static void Register(void) { 832*44951fc6Sjeremylt CeedRegister("/gpu/magma", CeedInit_Magma, 20); 83382b77998SStan Tomov } 834