182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details. 482b77998SStan Tomov // 582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 782b77998SStan Tomov // element discretizations for exascale applications. For more information and 882b77998SStan Tomov // source code availability see http://github.com/ceed. 982b77998SStan Tomov // 1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early 1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 1682b77998SStan Tomov 1782b77998SStan Tomov #include <ceed-impl.h> 1882b77998SStan Tomov #include <string.h> 1993fbe329SStan Tomov #include "magma.h" 2082b77998SStan Tomov 2182b77998SStan Tomov typedef struct { 2282b77998SStan Tomov CeedScalar *array; 2393fbe329SStan Tomov CeedScalar *darray; 24e2900954SStan Tomov int own_; 2582b77998SStan Tomov } CeedVector_Magma; 2682b77998SStan Tomov 2782b77998SStan Tomov typedef struct { 28*1751b0a9SStan Tomov CeedInt *indices; 29*1751b0a9SStan Tomov CeedInt *dindices; 30*1751b0a9SStan Tomov int own_; 3182b77998SStan Tomov } CeedElemRestriction_Magma; 3282b77998SStan Tomov 3382b77998SStan Tomov typedef struct { 3482b77998SStan Tomov CeedVector etmp; 3582b77998SStan Tomov CeedVector qdata; 3682b77998SStan Tomov } CeedOperator_Magma; 3782b77998SStan Tomov 3893fbe329SStan Tomov // ***************************************************************************** 3993fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode 4093fbe329SStan Tomov // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 4193fbe329SStan Tomov // * to array, and data is copied (not store passed pointer) 4293fbe329SStan Tomov // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 4393fbe329SStan Tomov // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 4493fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 4593fbe329SStan Tomov // ***************************************************************************** 4682b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 4782b77998SStan Tomov CeedCopyMode cmode, CeedScalar *array) { 4882b77998SStan Tomov CeedVector_Magma *impl = vec->data; 4982b77998SStan Tomov int ierr; 5082b77998SStan Tomov 51e2900954SStan Tomov // If own data, free that "old" data, e.g., as it may be of different size 52e2900954SStan Tomov if (impl->own_){ 53e2900954SStan Tomov magma_free( impl->darray ); 54e2900954SStan Tomov magma_free_pinned( impl->array ); 55e2900954SStan Tomov impl->darray = NULL; 56e2900954SStan Tomov impl->array = NULL; 57e2900954SStan Tomov impl->own_ = 0; 58e2900954SStan Tomov } 5993fbe329SStan Tomov 60e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 61e2900954SStan Tomov // memory is on the host; own_ = 0 6282b77998SStan Tomov switch (cmode) { 6382b77998SStan Tomov case CEED_COPY_VALUES: 6493fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 65e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 66e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 67e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 68e2900954SStan Tomov impl->own_ = 1; 69e2900954SStan Tomov 7093fbe329SStan Tomov if (array) 7193fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 7293fbe329SStan Tomov array, 1, impl->darray, 1); 7382b77998SStan Tomov break; 7482b77998SStan Tomov case CEED_OWN_POINTER: 7593fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 76e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 77*1751b0a9SStan Tomov // TODO: possible problem here is if we are passed non-pinned memory; 78*1751b0a9SStan Tomov // (as we own it, lter in destroy, we use free for pinned memory). 79e2900954SStan Tomov impl->array = array; 80e2900954SStan Tomov impl->own_ = 1; 81e2900954SStan Tomov 8293fbe329SStan Tomov if (array) 8393fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 8493fbe329SStan Tomov array, 1, impl->darray, 1); 8582b77998SStan Tomov break; 8682b77998SStan Tomov case CEED_USE_POINTER: 8793fbe329SStan Tomov impl->darray = NULL; 8882b77998SStan Tomov impl->array = array; 8982b77998SStan Tomov } 90e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 91e2900954SStan Tomov // memory is on the device; own = 0 92e2900954SStan Tomov switch (cmode) { 93e2900954SStan Tomov case CEED_COPY_VALUES: 94e2900954SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 95e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 96e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 97e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 98e2900954SStan Tomov impl->own_ = 1; 99e2900954SStan Tomov 100e2900954SStan Tomov if (array) 101e2900954SStan Tomov magma_copyvector(vec->length, sizeof(array[0]), 102e2900954SStan Tomov array, 1, impl->darray, 1); 103e2900954SStan Tomov break; 104e2900954SStan Tomov case CEED_OWN_POINTER: 105e2900954SStan Tomov impl->darray = array; 106e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 107e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 108e2900954SStan Tomov impl->own_ = 1; 109e2900954SStan Tomov 110e2900954SStan Tomov break; 111e2900954SStan Tomov case CEED_USE_POINTER: 112e2900954SStan Tomov impl->darray = array; 113e2900954SStan Tomov impl->array = NULL; 114e2900954SStan Tomov } 115e2900954SStan Tomov 116e2900954SStan Tomov } else 117e2900954SStan Tomov return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 118e2900954SStan Tomov 11982b77998SStan Tomov return 0; 12082b77998SStan Tomov } 12182b77998SStan Tomov 12293fbe329SStan Tomov // ***************************************************************************** 123e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) 12493fbe329SStan Tomov // ***************************************************************************** 12582b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 12682b77998SStan Tomov CeedScalar **array) { 12782b77998SStan Tomov CeedVector_Magma *impl = vec->data; 12882b77998SStan Tomov int ierr; 12982b77998SStan Tomov 130e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 131e2900954SStan Tomov if (!impl->array){ 132e2900954SStan Tomov // Allocate data if array is NULL 133e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 134e2900954SStan Tomov CeedChk(ierr); 135e2900954SStan Tomov } else if (impl->own_) { 136e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 137e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 138e2900954SStan Tomov impl->darray, 1, impl->array, 1); 13982b77998SStan Tomov } 14082b77998SStan Tomov *array = impl->array; 141e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 142e2900954SStan Tomov if (!impl->darray){ 143e2900954SStan Tomov // Allocate data if darray is NULL 144e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 145e2900954SStan Tomov CeedChk(ierr); 146e2900954SStan Tomov } 147e2900954SStan Tomov *array = impl->darray; 148e2900954SStan Tomov } else 149e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 150e2900954SStan Tomov 15182b77998SStan Tomov return 0; 15282b77998SStan Tomov } 15382b77998SStan Tomov 15493fbe329SStan Tomov // ***************************************************************************** 155e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 15693fbe329SStan Tomov // ***************************************************************************** 15782b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 15882b77998SStan Tomov const CeedScalar **array) { 15982b77998SStan Tomov CeedVector_Magma *impl = vec->data; 16082b77998SStan Tomov int ierr; 16182b77998SStan Tomov 162e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 163e2900954SStan Tomov if (!impl->array){ 164e2900954SStan Tomov // Allocate data if array is NULL 165e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 166e2900954SStan Tomov CeedChk(ierr); 167e2900954SStan Tomov } else if (impl->own_) { 168e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 169e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 170e2900954SStan Tomov impl->darray, 1, impl->array, 1); 17182b77998SStan Tomov } 17282b77998SStan Tomov *array = impl->array; 173e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 174e2900954SStan Tomov if (!impl->darray){ 175e2900954SStan Tomov // Allocate data if darray is NULL 176e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 177e2900954SStan Tomov CeedChk(ierr); 178e2900954SStan Tomov } 179e2900954SStan Tomov *array = impl->darray; 180e2900954SStan Tomov } else 181e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 182e2900954SStan Tomov 18382b77998SStan Tomov return 0; 18482b77998SStan Tomov } 18582b77998SStan Tomov 18693fbe329SStan Tomov // ***************************************************************************** 187e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 188e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 189e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 19093fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 19193fbe329SStan Tomov // * from vec and possibly modified them. 19293fbe329SStan Tomov // ***************************************************************************** 19382b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 19493fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 19593fbe329SStan Tomov 196e2900954SStan Tomov if (impl->darray!=NULL) 19793fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 19893fbe329SStan Tomov *array, 1, impl->darray, 1); 199e2900954SStan Tomov 20082b77998SStan Tomov *array = NULL; 20182b77998SStan Tomov return 0; 20282b77998SStan Tomov } 20382b77998SStan Tomov 20493fbe329SStan Tomov // ***************************************************************************** 205e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 206e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 207e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 20893fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 20993fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile 21093fbe329SStan Tomov // * and needs to be restored here. 21193fbe329SStan Tomov // ***************************************************************************** 21282b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 21382b77998SStan Tomov const CeedScalar **array) { 21493fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 21593fbe329SStan Tomov 216e2900954SStan Tomov if (impl->darray!=NULL) 21793fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 21893fbe329SStan Tomov *array, 1, impl->darray, 1); 219e2900954SStan Tomov 22082b77998SStan Tomov *array = NULL; 22182b77998SStan Tomov return 0; 22282b77998SStan Tomov } 22382b77998SStan Tomov 22482b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) { 22582b77998SStan Tomov CeedVector_Magma *impl = vec->data; 22682b77998SStan Tomov int ierr; 22782b77998SStan Tomov 228e2900954SStan Tomov // Free if we own the data 229e2900954SStan Tomov if (impl->own_){ 230e2900954SStan Tomov ierr = magma_free_pinned(impl->array); CeedChk(ierr); 23193fbe329SStan Tomov ierr = magma_free(impl->darray); CeedChk(ierr); 232e2900954SStan Tomov } 23382b77998SStan Tomov ierr = CeedFree(&vec->data); CeedChk(ierr); 23482b77998SStan Tomov return 0; 23582b77998SStan Tomov } 23682b77998SStan Tomov 23793fbe329SStan Tomov // ***************************************************************************** 23893fbe329SStan Tomov // * Create vector vec of size n 23993fbe329SStan Tomov // ***************************************************************************** 24082b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 24182b77998SStan Tomov CeedVector_Magma *impl; 24282b77998SStan Tomov int ierr; 24382b77998SStan Tomov 24482b77998SStan Tomov vec->SetArray = CeedVectorSetArray_Magma; 24582b77998SStan Tomov vec->GetArray = CeedVectorGetArray_Magma; 24682b77998SStan Tomov vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 24782b77998SStan Tomov vec->RestoreArray = CeedVectorRestoreArray_Magma; 24882b77998SStan Tomov vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 24982b77998SStan Tomov vec->Destroy = CeedVectorDestroy_Magma; 25082b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 25193fbe329SStan Tomov impl->darray = NULL; 252e2900954SStan Tomov impl->array = NULL; 253e2900954SStan Tomov impl->own_ = 0; 25482b77998SStan Tomov vec->data = impl; 25582b77998SStan Tomov return 0; 25682b77998SStan Tomov } 25782b77998SStan Tomov 25893fbe329SStan Tomov // ***************************************************************************** 25993fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u 26093fbe329SStan Tomov // ***************************************************************************** 26182b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 26282b77998SStan Tomov CeedTransposeMode tmode, CeedInt ncomp, 26382b77998SStan Tomov CeedTransposeMode lmode, CeedVector u, 26482b77998SStan Tomov CeedVector v, CeedRequest *request) { 26582b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 26682b77998SStan Tomov int ierr; 26782b77998SStan Tomov const CeedScalar *uu; 26882b77998SStan Tomov CeedScalar *vv; 26982b77998SStan Tomov CeedInt esize = r->nelem*r->elemsize; 270e2900954SStan Tomov //printf("HELLOOOOOOOOO=======================\n"); 27182b77998SStan Tomov 27282b77998SStan Tomov ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 27382b77998SStan Tomov ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 27482b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 27582b77998SStan Tomov // Perform: v = r * u 27682b77998SStan Tomov if (ncomp == 1) { 27782b77998SStan Tomov for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]]; 278e2900954SStan Tomov 279e2900954SStan Tomov /* 280e2900954SStan Tomov // Works - in t05 x has to be with CEED_COPY_VALUES 281e2900954SStan Tomov CeedVector_Magma *uimpl = u->data, *vimpl = v->data; 282e2900954SStan Tomov uu = uimpl->darray; 283e2900954SStan Tomov vv = vimpl->darray; 284e2900954SStan Tomov CeedInt *indices;// = (int *)impl->indices; 285e2900954SStan Tomov magma_malloc( (void**)&indices, esize * sizeof(CeedInt)); 286e2900954SStan Tomov magma_setvector(esize, sizeof(CeedInt), 287e2900954SStan Tomov (int *)impl->indices, 1, indices, 1); 288e2900954SStan Tomov magma_template<<i=0:esize>>(const CeedScalar *uu, CeedScalar *vv, CeedInt *indices) 289e2900954SStan Tomov { 290e2900954SStan Tomov vv[i] = uu[indices[i]]; 291e2900954SStan Tomov } 292e2900954SStan Tomov 293e2900954SStan Tomov // printf("HELLOOOOOOOOO....... size = %d\n", esize); 294e2900954SStan Tomov // 295e2900954SStan Tomov 296e2900954SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 297e2900954SStan Tomov *request = NULL; 298e2900954SStan Tomov return 0; 299e2900954SStan Tomov */ 300e2900954SStan Tomov 30182b77998SStan Tomov } else { 302e2900954SStan Tomov //printf("HELLOOOOOOOOO-------------\n"); 30382b77998SStan Tomov // vv is (elemsize x ncomp x nelem), column-major 30482b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 30582b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 30682b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 30782b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 30882b77998SStan Tomov vv[i+r->elemsize*(d+ncomp*e)] = 30982b77998SStan Tomov uu[impl->indices[i+r->elemsize*e]+r->ndof*d]; 31082b77998SStan Tomov } 31182b77998SStan Tomov } else { // u is (ncomp x ndof), column-major 31282b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 31382b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 31482b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 31582b77998SStan Tomov vv[i+r->elemsize*(d+ncomp*e)] = 31682b77998SStan Tomov uu[d+ncomp*impl->indices[i+r->elemsize*e]]; 31782b77998SStan Tomov } 31882b77998SStan Tomov } 31982b77998SStan Tomov } 32082b77998SStan Tomov } else { 32182b77998SStan Tomov // Note: in transpose mode, we perform: v += r^t * u 32282b77998SStan Tomov if (ncomp == 1) { 32382b77998SStan Tomov for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i]; 32482b77998SStan Tomov } else { 325e2900954SStan Tomov //printf("HELLOOOOOOOOO+++++++++++++\n"); 32682b77998SStan Tomov // u is (elemsize x ncomp x nelem) 32782b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 32882b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 32982b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 33082b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 33182b77998SStan Tomov vv[impl->indices[i+r->elemsize*e]+r->ndof*d] += 33282b77998SStan Tomov uu[i+r->elemsize*(d+e*ncomp)]; 33382b77998SStan Tomov } 33482b77998SStan Tomov } else { // vv is (ncomp x ndof), column-major 33582b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 33682b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 33782b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 33882b77998SStan Tomov vv[d+ncomp*impl->indices[i+r->elemsize*e]] += 33982b77998SStan Tomov uu[i+r->elemsize*(d+e*ncomp)]; 34082b77998SStan Tomov } 34182b77998SStan Tomov } 34282b77998SStan Tomov } 34382b77998SStan Tomov } 34482b77998SStan Tomov ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 34582b77998SStan Tomov ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 34682b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 34782b77998SStan Tomov *request = NULL; 34882b77998SStan Tomov return 0; 34982b77998SStan Tomov } 35082b77998SStan Tomov 35182b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 35282b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 35382b77998SStan Tomov int ierr; 35482b77998SStan Tomov 355*1751b0a9SStan Tomov // Free if we own the data 356*1751b0a9SStan Tomov if (impl->own_){ 357*1751b0a9SStan Tomov ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 358*1751b0a9SStan Tomov ierr = magma_free(impl->dindices); CeedChk(ierr); 359*1751b0a9SStan Tomov } 36082b77998SStan Tomov ierr = CeedFree(&r->data); CeedChk(ierr); 36182b77998SStan Tomov return 0; 36282b77998SStan Tomov } 36382b77998SStan Tomov 36482b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 36582b77998SStan Tomov CeedMemType mtype, 36693fbe329SStan Tomov CeedCopyMode cmode, 36793fbe329SStan Tomov const CeedInt *indices) { 368*1751b0a9SStan Tomov int ierr, size = r->nelem*r->elemsize; 36982b77998SStan Tomov CeedElemRestriction_Magma *impl; 37082b77998SStan Tomov 371*1751b0a9SStan Tomov // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 37282b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 373*1751b0a9SStan Tomov impl->dindices = NULL; 374*1751b0a9SStan Tomov impl->indices = NULL; 375*1751b0a9SStan Tomov impl->own_ = 0; 376*1751b0a9SStan Tomov 377*1751b0a9SStan Tomov if (mtype == CEED_MEM_HOST) { 378*1751b0a9SStan Tomov // memory is on the host; own_ = 0 37982b77998SStan Tomov switch (cmode) { 38082b77998SStan Tomov case CEED_COPY_VALUES: 381*1751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 382*1751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 383*1751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 384*1751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 385*1751b0a9SStan Tomov impl->own_ = 1; 386*1751b0a9SStan Tomov 387*1751b0a9SStan Tomov if (indices) 388*1751b0a9SStan Tomov magma_setvector(size, sizeof(CeedInt), 389*1751b0a9SStan Tomov indices, 1, impl->dindices, 1); 39082b77998SStan Tomov break; 39182b77998SStan Tomov case CEED_OWN_POINTER: 392*1751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 393*1751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 394*1751b0a9SStan Tomov // TODO: possible problem here is if we are passed non-pinned memory; 395*1751b0a9SStan Tomov // (as we own it, lter in destroy, we use free for pinned memory). 396*1751b0a9SStan Tomov impl->indices = indices; 397*1751b0a9SStan Tomov impl->own_ = 1; 398*1751b0a9SStan Tomov 399*1751b0a9SStan Tomov if (indices) 400*1751b0a9SStan Tomov magma_setvector(size, sizeof(CeedInt), 401*1751b0a9SStan Tomov indices, 1, impl->dindices, 1); 40282b77998SStan Tomov break; 40382b77998SStan Tomov case CEED_USE_POINTER: 404*1751b0a9SStan Tomov impl->dindices = NULL; 40582b77998SStan Tomov impl->indices = indices; 40682b77998SStan Tomov } 407*1751b0a9SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 408*1751b0a9SStan Tomov // memory is on the device; own = 0 409*1751b0a9SStan Tomov switch (cmode) { 410*1751b0a9SStan Tomov case CEED_COPY_VALUES: 411*1751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 412*1751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 413*1751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 414*1751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 415*1751b0a9SStan Tomov impl->own_ = 1; 416*1751b0a9SStan Tomov 417*1751b0a9SStan Tomov if (indices) 418*1751b0a9SStan Tomov magma_copyvector(size, sizeof(CeedInt), 419*1751b0a9SStan Tomov indices, 1, impl->dindices, 1); 420*1751b0a9SStan Tomov break; 421*1751b0a9SStan Tomov case CEED_OWN_POINTER: 422*1751b0a9SStan Tomov impl->dindices = indices; 423*1751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 424*1751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 425*1751b0a9SStan Tomov impl->own_ = 1; 426*1751b0a9SStan Tomov 427*1751b0a9SStan Tomov break; 428*1751b0a9SStan Tomov case CEED_USE_POINTER: 429*1751b0a9SStan Tomov impl->dindices = indices; 430*1751b0a9SStan Tomov impl->indices = NULL; 431*1751b0a9SStan Tomov } 432*1751b0a9SStan Tomov 433*1751b0a9SStan Tomov } else 434*1751b0a9SStan Tomov return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 435*1751b0a9SStan Tomov 43682b77998SStan Tomov r->data = impl; 43782b77998SStan Tomov r->Apply = CeedElemRestrictionApply_Magma; 43882b77998SStan Tomov r->Destroy = CeedElemRestrictionDestroy_Magma; 439*1751b0a9SStan Tomov 44082b77998SStan Tomov return 0; 44182b77998SStan Tomov } 44282b77998SStan Tomov 44382b77998SStan Tomov // Contracts on the middle index 44482b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc 44582b77998SStan Tomov // TRANSPOSE: V_ajc = T_bj U_abc 44682b77998SStan Tomov // If Add != 0, "=" is replaced by "+=" 44782b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed, 44882b77998SStan Tomov CeedInt A, CeedInt B, CeedInt C, CeedInt J, 44982b77998SStan Tomov const CeedScalar *t, CeedTransposeMode tmode, 45082b77998SStan Tomov const CeedInt Add, 45182b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 45282b77998SStan Tomov CeedInt tstride0 = B, tstride1 = 1; 45382b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 45482b77998SStan Tomov tstride0 = 1; tstride1 = J; 45582b77998SStan Tomov } 45682b77998SStan Tomov 45782b77998SStan Tomov for (CeedInt a=0; a<A; a++) { 45882b77998SStan Tomov for (CeedInt j=0; j<J; j++) { 45982b77998SStan Tomov if (!Add) { 46082b77998SStan Tomov for (CeedInt c=0; c<C; c++) 46182b77998SStan Tomov v[(a*J+j)*C+c] = 0; 46282b77998SStan Tomov } 46382b77998SStan Tomov for (CeedInt b=0; b<B; b++) { 46482b77998SStan Tomov for (CeedInt c=0; c<C; c++) { 46582b77998SStan Tomov v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 46682b77998SStan Tomov } 46782b77998SStan Tomov } 46882b77998SStan Tomov } 46982b77998SStan Tomov } 47082b77998SStan Tomov return 0; 47182b77998SStan Tomov } 47282b77998SStan Tomov 47382b77998SStan Tomov static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 47482b77998SStan Tomov CeedEvalMode emode, 47582b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 47682b77998SStan Tomov int ierr; 47782b77998SStan Tomov const CeedInt dim = basis->dim; 47882b77998SStan Tomov const CeedInt ndof = basis->ndof; 47982b77998SStan Tomov const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 48082b77998SStan Tomov const CeedInt add = (tmode == CEED_TRANSPOSE); 48182b77998SStan Tomov 48282b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 48382b77998SStan Tomov const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 48482b77998SStan Tomov for (CeedInt i = 0; i < vsize; i++) 48582b77998SStan Tomov v[i] = (CeedScalar) 0; 48682b77998SStan Tomov } 48782b77998SStan Tomov if (emode & CEED_EVAL_INTERP) { 48882b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 48982b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 49082b77998SStan Tomov P = basis->Q1d; Q = basis->P1d; 49182b77998SStan Tomov } 49282b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 49382b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 49482b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 49582b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 49682b77998SStan Tomov tmode, add&&(d==dim-1), 49782b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 49882b77998SStan Tomov CeedChk(ierr); 49982b77998SStan Tomov pre /= P; 50082b77998SStan Tomov post *= Q; 50182b77998SStan Tomov } 50282b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 50382b77998SStan Tomov v += nqpt; 50482b77998SStan Tomov } else { 50582b77998SStan Tomov u += nqpt; 50682b77998SStan Tomov } 50782b77998SStan Tomov } 50882b77998SStan Tomov if (emode & CEED_EVAL_GRAD) { 50982b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 51082b77998SStan Tomov // In CEED_NOTRANSPOSE mode: 51182b77998SStan Tomov // u is (P^dim x nc), column-major layout (nc = ndof) 51282b77998SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 51382b77998SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 51482b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 51582b77998SStan Tomov P = basis->Q1d, Q = basis->P1d; 51682b77998SStan Tomov } 51782b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 51882b77998SStan Tomov for (CeedInt p = 0; p < dim; p++) { 51982b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 52082b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 52182b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 52282b77998SStan Tomov (p==d)?basis->grad1d:basis->interp1d, 52382b77998SStan Tomov tmode, add&&(d==dim-1), 52482b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 52582b77998SStan Tomov CeedChk(ierr); 52682b77998SStan Tomov pre /= P; 52782b77998SStan Tomov post *= Q; 52882b77998SStan Tomov } 52982b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 53082b77998SStan Tomov v += nqpt; 53182b77998SStan Tomov } else { 53282b77998SStan Tomov u += nqpt; 53382b77998SStan Tomov } 53482b77998SStan Tomov } 53582b77998SStan Tomov } 53682b77998SStan Tomov if (emode & CEED_EVAL_WEIGHT) { 53782b77998SStan Tomov if (tmode == CEED_TRANSPOSE) 53882b77998SStan Tomov return CeedError(basis->ceed, 1, 53982b77998SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 54082b77998SStan Tomov CeedInt Q = basis->Q1d; 54182b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 54282b77998SStan Tomov CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 54382b77998SStan Tomov for (CeedInt i=0; i<pre; i++) { 54482b77998SStan Tomov for (CeedInt j=0; j<Q; j++) { 54582b77998SStan Tomov for (CeedInt k=0; k<post; k++) { 54682b77998SStan Tomov v[(i*Q + j)*post + k] = basis->qweight1d[j] 54782b77998SStan Tomov * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 54882b77998SStan Tomov } 54982b77998SStan Tomov } 55082b77998SStan Tomov } 55182b77998SStan Tomov } 55282b77998SStan Tomov } 55382b77998SStan Tomov return 0; 55482b77998SStan Tomov } 55582b77998SStan Tomov 55682b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) { 55782b77998SStan Tomov return 0; 55882b77998SStan Tomov } 55982b77998SStan Tomov 56082b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 56182b77998SStan Tomov CeedInt Q1d, const CeedScalar *interp1d, 56282b77998SStan Tomov const CeedScalar *grad1d, 56382b77998SStan Tomov const CeedScalar *qref1d, 56482b77998SStan Tomov const CeedScalar *qweight1d, 56582b77998SStan Tomov CeedBasis basis) { 56682b77998SStan Tomov basis->Apply = CeedBasisApply_Magma; 56782b77998SStan Tomov basis->Destroy = CeedBasisDestroy_Magma; 56882b77998SStan Tomov return 0; 56982b77998SStan Tomov } 57082b77998SStan Tomov 57182b77998SStan Tomov static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 57282b77998SStan Tomov const CeedScalar *const *u, 57382b77998SStan Tomov CeedScalar *const *v) { 57482b77998SStan Tomov int ierr; 57582b77998SStan Tomov ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 57682b77998SStan Tomov return 0; 57782b77998SStan Tomov } 57882b77998SStan Tomov 57982b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 58082b77998SStan Tomov return 0; 58182b77998SStan Tomov } 58282b77998SStan Tomov 58382b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 58482b77998SStan Tomov qf->Apply = CeedQFunctionApply_Magma; 58582b77998SStan Tomov qf->Destroy = CeedQFunctionDestroy_Magma; 58682b77998SStan Tomov return 0; 58782b77998SStan Tomov } 58882b77998SStan Tomov 58982b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) { 59082b77998SStan Tomov CeedOperator_Magma *impl = op->data; 59182b77998SStan Tomov int ierr; 59282b77998SStan Tomov 59382b77998SStan Tomov ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 59482b77998SStan Tomov ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 59582b77998SStan Tomov ierr = CeedFree(&op->data); CeedChk(ierr); 59682b77998SStan Tomov return 0; 59782b77998SStan Tomov } 59882b77998SStan Tomov 59982b77998SStan Tomov static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 60082b77998SStan Tomov CeedVector ustate, 60182b77998SStan Tomov CeedVector residual, CeedRequest *request) { 60282b77998SStan Tomov CeedOperator_Magma *impl = op->data; 60382b77998SStan Tomov CeedVector etmp; 60482b77998SStan Tomov CeedInt Q; 60582b77998SStan Tomov const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 60682b77998SStan Tomov CeedScalar *Eu; 60782b77998SStan Tomov char *qd; 60882b77998SStan Tomov int ierr; 60982b77998SStan Tomov CeedTransposeMode lmode = CEED_NOTRANSPOSE; 61082b77998SStan Tomov 61182b77998SStan Tomov if (!impl->etmp) { 61282b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 61382b77998SStan Tomov nc * op->Erestrict->nelem * op->Erestrict->elemsize, 61482b77998SStan Tomov &impl->etmp); CeedChk(ierr); 61582b77998SStan Tomov // etmp is allocated when CeedVectorGetArray is called below 61682b77998SStan Tomov } 61782b77998SStan Tomov etmp = impl->etmp; 61882b77998SStan Tomov if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 61982b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 62082b77998SStan Tomov nc, lmode, ustate, etmp, 62182b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 62282b77998SStan Tomov } 62382b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 62482b77998SStan Tomov ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 62582b77998SStan Tomov ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 62682b77998SStan Tomov CeedChk(ierr); 62782b77998SStan Tomov for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 62882b77998SStan Tomov CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 62982b77998SStan Tomov const CeedScalar *in[5] = {0,0,0,0,0}; 63082b77998SStan Tomov // TODO: quadrature weights can be computed just once 63182b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 63282b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc], BEu); 63382b77998SStan Tomov CeedChk(ierr); 63482b77998SStan Tomov CeedScalar *u_ptr = BEu, *v_ptr = BEv; 63582b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 63682b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 63782b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 63882b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 63982b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 64082b77998SStan Tomov ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 64182b77998SStan Tomov CeedChk(ierr); 64282b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 64382b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc]); 64482b77998SStan Tomov CeedChk(ierr); 64582b77998SStan Tomov } 64682b77998SStan Tomov ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 64782b77998SStan Tomov if (residual) { 64882b77998SStan Tomov CeedScalar *res; 64982b77998SStan Tomov CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 65082b77998SStan Tomov for (int i = 0; i < residual->length; i++) 65182b77998SStan Tomov res[i] = (CeedScalar)0; 65282b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 65382b77998SStan Tomov nc, lmode, etmp, residual, 65482b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 65582b77998SStan Tomov } 65682b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 65782b77998SStan Tomov *request = NULL; 65882b77998SStan Tomov return 0; 65982b77998SStan Tomov } 66082b77998SStan Tomov 66182b77998SStan Tomov static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 66282b77998SStan Tomov CeedOperator_Magma *impl = op->data; 66382b77998SStan Tomov int ierr; 66482b77998SStan Tomov 66582b77998SStan Tomov if (!impl->qdata) { 66682b77998SStan Tomov CeedInt Q; 66782b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 66882b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 66982b77998SStan Tomov op->Erestrict->nelem * Q 67082b77998SStan Tomov * op->qf->qdatasize / sizeof(CeedScalar), 67182b77998SStan Tomov &impl->qdata); CeedChk(ierr); 67282b77998SStan Tomov } 67382b77998SStan Tomov *qdata = impl->qdata; 67482b77998SStan Tomov return 0; 67582b77998SStan Tomov } 67682b77998SStan Tomov 67782b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) { 67882b77998SStan Tomov CeedOperator_Magma *impl; 67982b77998SStan Tomov int ierr; 68082b77998SStan Tomov 68182b77998SStan Tomov ierr = CeedCalloc(1, &impl); CeedChk(ierr); 68282b77998SStan Tomov op->data = impl; 68382b77998SStan Tomov op->Destroy = CeedOperatorDestroy_Magma; 68482b77998SStan Tomov op->Apply = CeedOperatorApply_Magma; 68582b77998SStan Tomov op->GetQData = CeedOperatorGetQData_Magma; 68682b77998SStan Tomov return 0; 68782b77998SStan Tomov } 68882b77998SStan Tomov 68982b77998SStan Tomov // ***************************************************************************** 69082b77998SStan Tomov // * INIT 69182b77998SStan Tomov // ***************************************************************************** 69282b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) { 69382b77998SStan Tomov if (strcmp(resource, "/cpu/magma") 69482b77998SStan Tomov && strcmp(resource, "/gpu/magma")) 69582b77998SStan Tomov return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 69693fbe329SStan Tomov 69793fbe329SStan Tomov magma_init(); 69893fbe329SStan Tomov //magma_print_environment(); 69993fbe329SStan Tomov 70082b77998SStan Tomov ceed->VecCreate = CeedVectorCreate_Magma; 70182b77998SStan Tomov ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 70282b77998SStan Tomov ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 70382b77998SStan Tomov ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 70482b77998SStan Tomov ceed->OperatorCreate = CeedOperatorCreate_Magma; 70582b77998SStan Tomov return 0; 70682b77998SStan Tomov } 70782b77998SStan Tomov 70882b77998SStan Tomov // ***************************************************************************** 70982b77998SStan Tomov // * REGISTER 71082b77998SStan Tomov // ***************************************************************************** 71182b77998SStan Tomov __attribute__((constructor)) 71282b77998SStan Tomov static void Register(void) { 71382b77998SStan Tomov CeedRegister("/cpu/magma", CeedInit_Magma); 71482b77998SStan Tomov CeedRegister("/gpu/magma", CeedInit_Magma); 71582b77998SStan Tomov } 716