182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details. 482b77998SStan Tomov // 582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 782b77998SStan Tomov // element discretizations for exascale applications. For more information and 882b77998SStan Tomov // source code availability see http://github.com/ceed. 982b77998SStan Tomov // 1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early 1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 1682b77998SStan Tomov 1782b77998SStan Tomov #include <ceed-impl.h> 1882b77998SStan Tomov #include <string.h> 1993fbe329SStan Tomov #include "magma.h" 2082b77998SStan Tomov 2182b77998SStan Tomov typedef struct { 2282b77998SStan Tomov CeedScalar *array; 2393fbe329SStan Tomov CeedScalar *darray; 24*e2900954SStan Tomov int own_; 2582b77998SStan Tomov } CeedVector_Magma; 2682b77998SStan Tomov 2782b77998SStan Tomov typedef struct { 2882b77998SStan Tomov const CeedInt *indices; 2982b77998SStan Tomov CeedInt *indices_allocated; 3082b77998SStan Tomov } CeedElemRestriction_Magma; 3182b77998SStan Tomov 3282b77998SStan Tomov typedef struct { 3382b77998SStan Tomov CeedVector etmp; 3482b77998SStan Tomov CeedVector qdata; 3582b77998SStan Tomov } CeedOperator_Magma; 3682b77998SStan Tomov 3793fbe329SStan Tomov // ***************************************************************************** 3893fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode 3993fbe329SStan Tomov // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 4093fbe329SStan Tomov // * to array, and data is copied (not store passed pointer) 4193fbe329SStan Tomov // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 4293fbe329SStan Tomov // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 4393fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 4493fbe329SStan Tomov // ***************************************************************************** 4582b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 4682b77998SStan Tomov CeedCopyMode cmode, CeedScalar *array) { 4782b77998SStan Tomov CeedVector_Magma *impl = vec->data; 4882b77998SStan Tomov int ierr; 4982b77998SStan Tomov 50*e2900954SStan Tomov // If own data, free that "old" data, e.g., as it may be of different size 51*e2900954SStan Tomov if (impl->own_){ 52*e2900954SStan Tomov magma_free( impl->darray ); 53*e2900954SStan Tomov magma_free_pinned( impl->array ); 54*e2900954SStan Tomov impl->darray = NULL; 55*e2900954SStan Tomov impl->array = NULL; 56*e2900954SStan Tomov impl->own_ = 0; 57*e2900954SStan Tomov } 5893fbe329SStan Tomov 59*e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 60*e2900954SStan Tomov // memory is on the host; own_ = 0 6182b77998SStan Tomov switch (cmode) { 6282b77998SStan Tomov case CEED_COPY_VALUES: 6393fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 64*e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 65*e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 66*e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 67*e2900954SStan Tomov impl->own_ = 1; 68*e2900954SStan Tomov 6993fbe329SStan Tomov if (array) 7093fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 7193fbe329SStan Tomov array, 1, impl->darray, 1); 7282b77998SStan Tomov break; 7382b77998SStan Tomov case CEED_OWN_POINTER: 7493fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 75*e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 76*e2900954SStan Tomov impl->array = array; 77*e2900954SStan Tomov impl->own_ = 1; 78*e2900954SStan Tomov 7993fbe329SStan Tomov if (array) 8093fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 8193fbe329SStan Tomov array, 1, impl->darray, 1); 8282b77998SStan Tomov break; 8382b77998SStan Tomov case CEED_USE_POINTER: 8493fbe329SStan Tomov impl->darray = NULL; 8582b77998SStan Tomov impl->array = array; 8682b77998SStan Tomov } 87*e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 88*e2900954SStan Tomov // memory is on the device; own = 0 89*e2900954SStan Tomov switch (cmode) { 90*e2900954SStan Tomov case CEED_COPY_VALUES: 91*e2900954SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 92*e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 93*e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 94*e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 95*e2900954SStan Tomov impl->own_ = 1; 96*e2900954SStan Tomov 97*e2900954SStan Tomov if (array) 98*e2900954SStan Tomov magma_copyvector(vec->length, sizeof(array[0]), 99*e2900954SStan Tomov array, 1, impl->darray, 1); 100*e2900954SStan Tomov break; 101*e2900954SStan Tomov case CEED_OWN_POINTER: 102*e2900954SStan Tomov impl->darray = array; 103*e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 104*e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 105*e2900954SStan Tomov impl->own_ = 1; 106*e2900954SStan Tomov 107*e2900954SStan Tomov break; 108*e2900954SStan Tomov case CEED_USE_POINTER: 109*e2900954SStan Tomov impl->darray = array; 110*e2900954SStan Tomov impl->array = NULL; 111*e2900954SStan Tomov } 112*e2900954SStan Tomov 113*e2900954SStan Tomov } else 114*e2900954SStan Tomov return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 115*e2900954SStan Tomov 11682b77998SStan Tomov return 0; 11782b77998SStan Tomov } 11882b77998SStan Tomov 11993fbe329SStan Tomov // ***************************************************************************** 120*e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) 12193fbe329SStan Tomov // ***************************************************************************** 12282b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 12382b77998SStan Tomov CeedScalar **array) { 12482b77998SStan Tomov CeedVector_Magma *impl = vec->data; 12582b77998SStan Tomov int ierr; 12682b77998SStan Tomov 127*e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 128*e2900954SStan Tomov if (!impl->array){ 129*e2900954SStan Tomov // Allocate data if array is NULL 130*e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 131*e2900954SStan Tomov CeedChk(ierr); 132*e2900954SStan Tomov } else if (impl->own_) { 133*e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 134*e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 135*e2900954SStan Tomov impl->darray, 1, impl->array, 1); 13682b77998SStan Tomov } 13782b77998SStan Tomov *array = impl->array; 138*e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 139*e2900954SStan Tomov if (!impl->darray){ 140*e2900954SStan Tomov // Allocate data if darray is NULL 141*e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 142*e2900954SStan Tomov CeedChk(ierr); 143*e2900954SStan Tomov } 144*e2900954SStan Tomov *array = impl->darray; 145*e2900954SStan Tomov } else 146*e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 147*e2900954SStan Tomov 14882b77998SStan Tomov return 0; 14982b77998SStan Tomov } 15082b77998SStan Tomov 15193fbe329SStan Tomov // ***************************************************************************** 152*e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 15393fbe329SStan Tomov // ***************************************************************************** 15482b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 15582b77998SStan Tomov const CeedScalar **array) { 15682b77998SStan Tomov CeedVector_Magma *impl = vec->data; 15782b77998SStan Tomov int ierr; 15882b77998SStan Tomov 159*e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 160*e2900954SStan Tomov if (!impl->array){ 161*e2900954SStan Tomov // Allocate data if array is NULL 162*e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 163*e2900954SStan Tomov CeedChk(ierr); 164*e2900954SStan Tomov } else if (impl->own_) { 165*e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 166*e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 167*e2900954SStan Tomov impl->darray, 1, impl->array, 1); 16882b77998SStan Tomov } 16982b77998SStan Tomov *array = impl->array; 170*e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 171*e2900954SStan Tomov if (!impl->darray){ 172*e2900954SStan Tomov // Allocate data if darray is NULL 173*e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 174*e2900954SStan Tomov CeedChk(ierr); 175*e2900954SStan Tomov } 176*e2900954SStan Tomov *array = impl->darray; 177*e2900954SStan Tomov } else 178*e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 179*e2900954SStan Tomov 18082b77998SStan Tomov return 0; 18182b77998SStan Tomov } 18282b77998SStan Tomov 18393fbe329SStan Tomov // ***************************************************************************** 184*e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 185*e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 186*e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 18793fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 18893fbe329SStan Tomov // * from vec and possibly modified them. 18993fbe329SStan Tomov // ***************************************************************************** 19082b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 19193fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 19293fbe329SStan Tomov 193*e2900954SStan Tomov if (impl->darray!=NULL) 19493fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 19593fbe329SStan Tomov *array, 1, impl->darray, 1); 196*e2900954SStan Tomov 19782b77998SStan Tomov *array = NULL; 19882b77998SStan Tomov return 0; 19982b77998SStan Tomov } 20082b77998SStan Tomov 20193fbe329SStan Tomov // ***************************************************************************** 202*e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 203*e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 204*e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 20593fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 20693fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile 20793fbe329SStan Tomov // * and needs to be restored here. 20893fbe329SStan Tomov // ***************************************************************************** 20982b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 21082b77998SStan Tomov const CeedScalar **array) { 21193fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 21293fbe329SStan Tomov 213*e2900954SStan Tomov if (impl->darray!=NULL) 21493fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 21593fbe329SStan Tomov *array, 1, impl->darray, 1); 216*e2900954SStan Tomov 21782b77998SStan Tomov *array = NULL; 21882b77998SStan Tomov return 0; 21982b77998SStan Tomov } 22082b77998SStan Tomov 22182b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) { 22282b77998SStan Tomov CeedVector_Magma *impl = vec->data; 22382b77998SStan Tomov int ierr; 22482b77998SStan Tomov 225*e2900954SStan Tomov // Free if we own the data 226*e2900954SStan Tomov if (impl->own_){ 227*e2900954SStan Tomov ierr = magma_free_pinned(impl->array); CeedChk(ierr); 22893fbe329SStan Tomov ierr = magma_free(impl->darray); CeedChk(ierr); 229*e2900954SStan Tomov } 23082b77998SStan Tomov ierr = CeedFree(&vec->data); CeedChk(ierr); 23182b77998SStan Tomov return 0; 23282b77998SStan Tomov } 23382b77998SStan Tomov 23493fbe329SStan Tomov // ***************************************************************************** 23593fbe329SStan Tomov // * Create vector vec of size n 23693fbe329SStan Tomov // ***************************************************************************** 23782b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 23882b77998SStan Tomov CeedVector_Magma *impl; 23982b77998SStan Tomov int ierr; 24082b77998SStan Tomov 24182b77998SStan Tomov vec->SetArray = CeedVectorSetArray_Magma; 24282b77998SStan Tomov vec->GetArray = CeedVectorGetArray_Magma; 24382b77998SStan Tomov vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 24482b77998SStan Tomov vec->RestoreArray = CeedVectorRestoreArray_Magma; 24582b77998SStan Tomov vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 24682b77998SStan Tomov vec->Destroy = CeedVectorDestroy_Magma; 24782b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 24893fbe329SStan Tomov impl->darray = NULL; 249*e2900954SStan Tomov impl->array = NULL; 250*e2900954SStan Tomov impl->own_ = 0; 25182b77998SStan Tomov vec->data = impl; 25282b77998SStan Tomov return 0; 25382b77998SStan Tomov } 25482b77998SStan Tomov 25593fbe329SStan Tomov // ***************************************************************************** 25693fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u 25793fbe329SStan Tomov // ***************************************************************************** 25882b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 25982b77998SStan Tomov CeedTransposeMode tmode, CeedInt ncomp, 26082b77998SStan Tomov CeedTransposeMode lmode, CeedVector u, 26182b77998SStan Tomov CeedVector v, CeedRequest *request) { 26282b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 26382b77998SStan Tomov int ierr; 26482b77998SStan Tomov const CeedScalar *uu; 26582b77998SStan Tomov CeedScalar *vv; 26682b77998SStan Tomov CeedInt esize = r->nelem*r->elemsize; 267*e2900954SStan Tomov //printf("HELLOOOOOOOOO=======================\n"); 26882b77998SStan Tomov 26982b77998SStan Tomov ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 27082b77998SStan Tomov ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 27182b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 27282b77998SStan Tomov // Perform: v = r * u 27382b77998SStan Tomov if (ncomp == 1) { 27482b77998SStan Tomov for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]]; 275*e2900954SStan Tomov 276*e2900954SStan Tomov /* 277*e2900954SStan Tomov // Works - in t05 x has to be with CEED_COPY_VALUES 278*e2900954SStan Tomov CeedVector_Magma *uimpl = u->data, *vimpl = v->data; 279*e2900954SStan Tomov uu = uimpl->darray; 280*e2900954SStan Tomov vv = vimpl->darray; 281*e2900954SStan Tomov CeedInt *indices;// = (int *)impl->indices; 282*e2900954SStan Tomov magma_malloc( (void**)&indices, esize * sizeof(CeedInt)); 283*e2900954SStan Tomov magma_setvector(esize, sizeof(CeedInt), 284*e2900954SStan Tomov (int *)impl->indices, 1, indices, 1); 285*e2900954SStan Tomov magma_template<<i=0:esize>>(const CeedScalar *uu, CeedScalar *vv, CeedInt *indices) 286*e2900954SStan Tomov { 287*e2900954SStan Tomov vv[i] = uu[indices[i]]; 288*e2900954SStan Tomov } 289*e2900954SStan Tomov 290*e2900954SStan Tomov // printf("HELLOOOOOOOOO....... size = %d\n", esize); 291*e2900954SStan Tomov // 292*e2900954SStan Tomov 293*e2900954SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 294*e2900954SStan Tomov *request = NULL; 295*e2900954SStan Tomov return 0; 296*e2900954SStan Tomov */ 297*e2900954SStan Tomov 29882b77998SStan Tomov } else { 299*e2900954SStan Tomov //printf("HELLOOOOOOOOO-------------\n"); 30082b77998SStan Tomov // vv is (elemsize x ncomp x nelem), column-major 30182b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 30282b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 30382b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 30482b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 30582b77998SStan Tomov vv[i+r->elemsize*(d+ncomp*e)] = 30682b77998SStan Tomov uu[impl->indices[i+r->elemsize*e]+r->ndof*d]; 30782b77998SStan Tomov } 30882b77998SStan Tomov } else { // u is (ncomp x ndof), column-major 30982b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 31082b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 31182b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 31282b77998SStan Tomov vv[i+r->elemsize*(d+ncomp*e)] = 31382b77998SStan Tomov uu[d+ncomp*impl->indices[i+r->elemsize*e]]; 31482b77998SStan Tomov } 31582b77998SStan Tomov } 31682b77998SStan Tomov } 31782b77998SStan Tomov } else { 31882b77998SStan Tomov // Note: in transpose mode, we perform: v += r^t * u 31982b77998SStan Tomov if (ncomp == 1) { 32082b77998SStan Tomov for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i]; 32182b77998SStan Tomov } else { 322*e2900954SStan Tomov //printf("HELLOOOOOOOOO+++++++++++++\n"); 32382b77998SStan Tomov // u is (elemsize x ncomp x nelem) 32482b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 32582b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 32682b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 32782b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 32882b77998SStan Tomov vv[impl->indices[i+r->elemsize*e]+r->ndof*d] += 32982b77998SStan Tomov uu[i+r->elemsize*(d+e*ncomp)]; 33082b77998SStan Tomov } 33182b77998SStan Tomov } else { // vv is (ncomp x ndof), column-major 33282b77998SStan Tomov for (CeedInt e = 0; e < r->nelem; e++) 33382b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 33482b77998SStan Tomov for (CeedInt i=0; i<r->elemsize; i++) { 33582b77998SStan Tomov vv[d+ncomp*impl->indices[i+r->elemsize*e]] += 33682b77998SStan Tomov uu[i+r->elemsize*(d+e*ncomp)]; 33782b77998SStan Tomov } 33882b77998SStan Tomov } 33982b77998SStan Tomov } 34082b77998SStan Tomov } 34182b77998SStan Tomov ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 34282b77998SStan Tomov ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 34382b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 34482b77998SStan Tomov *request = NULL; 34582b77998SStan Tomov return 0; 34682b77998SStan Tomov } 34782b77998SStan Tomov 34882b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 34982b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 35082b77998SStan Tomov int ierr; 35182b77998SStan Tomov 35282b77998SStan Tomov ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 35382b77998SStan Tomov ierr = CeedFree(&r->data); CeedChk(ierr); 35482b77998SStan Tomov return 0; 35582b77998SStan Tomov } 35682b77998SStan Tomov 35782b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 35882b77998SStan Tomov CeedMemType mtype, 35993fbe329SStan Tomov CeedCopyMode cmode, 36093fbe329SStan Tomov const CeedInt *indices) { 36182b77998SStan Tomov int ierr; 36282b77998SStan Tomov CeedElemRestriction_Magma *impl; 36382b77998SStan Tomov 36482b77998SStan Tomov if (mtype != CEED_MEM_HOST) 36582b77998SStan Tomov return CeedError(r->ceed, 1, "Only MemType = HOST supported"); 36682b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 36782b77998SStan Tomov switch (cmode) { 36882b77998SStan Tomov case CEED_COPY_VALUES: 369*e2900954SStan Tomov //printf("CeedElemRestriction_Magma COPY\n"); 37082b77998SStan Tomov ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated); 37182b77998SStan Tomov CeedChk(ierr); 37282b77998SStan Tomov memcpy(impl->indices_allocated, indices, 37382b77998SStan Tomov r->nelem * r->elemsize * sizeof(indices[0])); 37482b77998SStan Tomov impl->indices = impl->indices_allocated; 37582b77998SStan Tomov break; 37682b77998SStan Tomov case CEED_OWN_POINTER: 377*e2900954SStan Tomov //printf("CeedElemRestriction_Magma OWN\n"); 37882b77998SStan Tomov impl->indices_allocated = (CeedInt *)indices; 37982b77998SStan Tomov impl->indices = impl->indices_allocated; 38082b77998SStan Tomov break; 38182b77998SStan Tomov case CEED_USE_POINTER: 382*e2900954SStan Tomov //printf("CeedElemRestriction_Magma USE\n"); 38382b77998SStan Tomov impl->indices = indices; 38482b77998SStan Tomov } 38582b77998SStan Tomov r->data = impl; 38682b77998SStan Tomov r->Apply = CeedElemRestrictionApply_Magma; 38782b77998SStan Tomov r->Destroy = CeedElemRestrictionDestroy_Magma; 38882b77998SStan Tomov return 0; 38982b77998SStan Tomov } 39082b77998SStan Tomov 39182b77998SStan Tomov // Contracts on the middle index 39282b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc 39382b77998SStan Tomov // TRANSPOSE: V_ajc = T_bj U_abc 39482b77998SStan Tomov // If Add != 0, "=" is replaced by "+=" 39582b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed, 39682b77998SStan Tomov CeedInt A, CeedInt B, CeedInt C, CeedInt J, 39782b77998SStan Tomov const CeedScalar *t, CeedTransposeMode tmode, 39882b77998SStan Tomov const CeedInt Add, 39982b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 40082b77998SStan Tomov CeedInt tstride0 = B, tstride1 = 1; 40182b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 40282b77998SStan Tomov tstride0 = 1; tstride1 = J; 40382b77998SStan Tomov } 40482b77998SStan Tomov 40582b77998SStan Tomov for (CeedInt a=0; a<A; a++) { 40682b77998SStan Tomov for (CeedInt j=0; j<J; j++) { 40782b77998SStan Tomov if (!Add) { 40882b77998SStan Tomov for (CeedInt c=0; c<C; c++) 40982b77998SStan Tomov v[(a*J+j)*C+c] = 0; 41082b77998SStan Tomov } 41182b77998SStan Tomov for (CeedInt b=0; b<B; b++) { 41282b77998SStan Tomov for (CeedInt c=0; c<C; c++) { 41382b77998SStan Tomov v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 41482b77998SStan Tomov } 41582b77998SStan Tomov } 41682b77998SStan Tomov } 41782b77998SStan Tomov } 41882b77998SStan Tomov return 0; 41982b77998SStan Tomov } 42082b77998SStan Tomov 42182b77998SStan Tomov static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 42282b77998SStan Tomov CeedEvalMode emode, 42382b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 42482b77998SStan Tomov int ierr; 42582b77998SStan Tomov const CeedInt dim = basis->dim; 42682b77998SStan Tomov const CeedInt ndof = basis->ndof; 42782b77998SStan Tomov const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 42882b77998SStan Tomov const CeedInt add = (tmode == CEED_TRANSPOSE); 42982b77998SStan Tomov 43082b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 43182b77998SStan Tomov const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 43282b77998SStan Tomov for (CeedInt i = 0; i < vsize; i++) 43382b77998SStan Tomov v[i] = (CeedScalar) 0; 43482b77998SStan Tomov } 43582b77998SStan Tomov if (emode & CEED_EVAL_INTERP) { 43682b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 43782b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 43882b77998SStan Tomov P = basis->Q1d; Q = basis->P1d; 43982b77998SStan Tomov } 44082b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 44182b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 44282b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 44382b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 44482b77998SStan Tomov tmode, add&&(d==dim-1), 44582b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 44682b77998SStan Tomov CeedChk(ierr); 44782b77998SStan Tomov pre /= P; 44882b77998SStan Tomov post *= Q; 44982b77998SStan Tomov } 45082b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 45182b77998SStan Tomov v += nqpt; 45282b77998SStan Tomov } else { 45382b77998SStan Tomov u += nqpt; 45482b77998SStan Tomov } 45582b77998SStan Tomov } 45682b77998SStan Tomov if (emode & CEED_EVAL_GRAD) { 45782b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 45882b77998SStan Tomov // In CEED_NOTRANSPOSE mode: 45982b77998SStan Tomov // u is (P^dim x nc), column-major layout (nc = ndof) 46082b77998SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 46182b77998SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 46282b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 46382b77998SStan Tomov P = basis->Q1d, Q = basis->P1d; 46482b77998SStan Tomov } 46582b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 46682b77998SStan Tomov for (CeedInt p = 0; p < dim; p++) { 46782b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 46882b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 46982b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 47082b77998SStan Tomov (p==d)?basis->grad1d:basis->interp1d, 47182b77998SStan Tomov tmode, add&&(d==dim-1), 47282b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 47382b77998SStan Tomov CeedChk(ierr); 47482b77998SStan Tomov pre /= P; 47582b77998SStan Tomov post *= Q; 47682b77998SStan Tomov } 47782b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 47882b77998SStan Tomov v += nqpt; 47982b77998SStan Tomov } else { 48082b77998SStan Tomov u += nqpt; 48182b77998SStan Tomov } 48282b77998SStan Tomov } 48382b77998SStan Tomov } 48482b77998SStan Tomov if (emode & CEED_EVAL_WEIGHT) { 48582b77998SStan Tomov if (tmode == CEED_TRANSPOSE) 48682b77998SStan Tomov return CeedError(basis->ceed, 1, 48782b77998SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 48882b77998SStan Tomov CeedInt Q = basis->Q1d; 48982b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 49082b77998SStan Tomov CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 49182b77998SStan Tomov for (CeedInt i=0; i<pre; i++) { 49282b77998SStan Tomov for (CeedInt j=0; j<Q; j++) { 49382b77998SStan Tomov for (CeedInt k=0; k<post; k++) { 49482b77998SStan Tomov v[(i*Q + j)*post + k] = basis->qweight1d[j] 49582b77998SStan Tomov * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 49682b77998SStan Tomov } 49782b77998SStan Tomov } 49882b77998SStan Tomov } 49982b77998SStan Tomov } 50082b77998SStan Tomov } 50182b77998SStan Tomov return 0; 50282b77998SStan Tomov } 50382b77998SStan Tomov 50482b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) { 50582b77998SStan Tomov return 0; 50682b77998SStan Tomov } 50782b77998SStan Tomov 50882b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 50982b77998SStan Tomov CeedInt Q1d, const CeedScalar *interp1d, 51082b77998SStan Tomov const CeedScalar *grad1d, 51182b77998SStan Tomov const CeedScalar *qref1d, 51282b77998SStan Tomov const CeedScalar *qweight1d, 51382b77998SStan Tomov CeedBasis basis) { 51482b77998SStan Tomov basis->Apply = CeedBasisApply_Magma; 51582b77998SStan Tomov basis->Destroy = CeedBasisDestroy_Magma; 51682b77998SStan Tomov return 0; 51782b77998SStan Tomov } 51882b77998SStan Tomov 51982b77998SStan Tomov static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 52082b77998SStan Tomov const CeedScalar *const *u, 52182b77998SStan Tomov CeedScalar *const *v) { 52282b77998SStan Tomov int ierr; 52382b77998SStan Tomov ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 52482b77998SStan Tomov return 0; 52582b77998SStan Tomov } 52682b77998SStan Tomov 52782b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 52882b77998SStan Tomov return 0; 52982b77998SStan Tomov } 53082b77998SStan Tomov 53182b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 53282b77998SStan Tomov qf->Apply = CeedQFunctionApply_Magma; 53382b77998SStan Tomov qf->Destroy = CeedQFunctionDestroy_Magma; 53482b77998SStan Tomov return 0; 53582b77998SStan Tomov } 53682b77998SStan Tomov 53782b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) { 53882b77998SStan Tomov CeedOperator_Magma *impl = op->data; 53982b77998SStan Tomov int ierr; 54082b77998SStan Tomov 54182b77998SStan Tomov ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 54282b77998SStan Tomov ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 54382b77998SStan Tomov ierr = CeedFree(&op->data); CeedChk(ierr); 54482b77998SStan Tomov return 0; 54582b77998SStan Tomov } 54682b77998SStan Tomov 54782b77998SStan Tomov static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 54882b77998SStan Tomov CeedVector ustate, 54982b77998SStan Tomov CeedVector residual, CeedRequest *request) { 55082b77998SStan Tomov CeedOperator_Magma *impl = op->data; 55182b77998SStan Tomov CeedVector etmp; 55282b77998SStan Tomov CeedInt Q; 55382b77998SStan Tomov const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 55482b77998SStan Tomov CeedScalar *Eu; 55582b77998SStan Tomov char *qd; 55682b77998SStan Tomov int ierr; 55782b77998SStan Tomov CeedTransposeMode lmode = CEED_NOTRANSPOSE; 55882b77998SStan Tomov 55982b77998SStan Tomov if (!impl->etmp) { 56082b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 56182b77998SStan Tomov nc * op->Erestrict->nelem * op->Erestrict->elemsize, 56282b77998SStan Tomov &impl->etmp); CeedChk(ierr); 56382b77998SStan Tomov // etmp is allocated when CeedVectorGetArray is called below 56482b77998SStan Tomov } 56582b77998SStan Tomov etmp = impl->etmp; 56682b77998SStan Tomov if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 56782b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 56882b77998SStan Tomov nc, lmode, ustate, etmp, 56982b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 57082b77998SStan Tomov } 57182b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 57282b77998SStan Tomov ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 57382b77998SStan Tomov ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 57482b77998SStan Tomov CeedChk(ierr); 57582b77998SStan Tomov for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 57682b77998SStan Tomov CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 57782b77998SStan Tomov const CeedScalar *in[5] = {0,0,0,0,0}; 57882b77998SStan Tomov // TODO: quadrature weights can be computed just once 57982b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 58082b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc], BEu); 58182b77998SStan Tomov CeedChk(ierr); 58282b77998SStan Tomov CeedScalar *u_ptr = BEu, *v_ptr = BEv; 58382b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 58482b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 58582b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 58682b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 58782b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 58882b77998SStan Tomov ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 58982b77998SStan Tomov CeedChk(ierr); 59082b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 59182b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc]); 59282b77998SStan Tomov CeedChk(ierr); 59382b77998SStan Tomov } 59482b77998SStan Tomov ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 59582b77998SStan Tomov if (residual) { 59682b77998SStan Tomov CeedScalar *res; 59782b77998SStan Tomov CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 59882b77998SStan Tomov for (int i = 0; i < residual->length; i++) 59982b77998SStan Tomov res[i] = (CeedScalar)0; 60082b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 60182b77998SStan Tomov nc, lmode, etmp, residual, 60282b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 60382b77998SStan Tomov } 60482b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 60582b77998SStan Tomov *request = NULL; 60682b77998SStan Tomov return 0; 60782b77998SStan Tomov } 60882b77998SStan Tomov 60982b77998SStan Tomov static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 61082b77998SStan Tomov CeedOperator_Magma *impl = op->data; 61182b77998SStan Tomov int ierr; 61282b77998SStan Tomov 61382b77998SStan Tomov if (!impl->qdata) { 61482b77998SStan Tomov CeedInt Q; 61582b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 61682b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 61782b77998SStan Tomov op->Erestrict->nelem * Q 61882b77998SStan Tomov * op->qf->qdatasize / sizeof(CeedScalar), 61982b77998SStan Tomov &impl->qdata); CeedChk(ierr); 62082b77998SStan Tomov } 62182b77998SStan Tomov *qdata = impl->qdata; 62282b77998SStan Tomov return 0; 62382b77998SStan Tomov } 62482b77998SStan Tomov 62582b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) { 62682b77998SStan Tomov CeedOperator_Magma *impl; 62782b77998SStan Tomov int ierr; 62882b77998SStan Tomov 62982b77998SStan Tomov ierr = CeedCalloc(1, &impl); CeedChk(ierr); 63082b77998SStan Tomov op->data = impl; 63182b77998SStan Tomov op->Destroy = CeedOperatorDestroy_Magma; 63282b77998SStan Tomov op->Apply = CeedOperatorApply_Magma; 63382b77998SStan Tomov op->GetQData = CeedOperatorGetQData_Magma; 63482b77998SStan Tomov return 0; 63582b77998SStan Tomov } 63682b77998SStan Tomov 63782b77998SStan Tomov // ***************************************************************************** 63882b77998SStan Tomov // * INIT 63982b77998SStan Tomov // ***************************************************************************** 64082b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) { 64182b77998SStan Tomov if (strcmp(resource, "/cpu/magma") 64282b77998SStan Tomov && strcmp(resource, "/gpu/magma")) 64382b77998SStan Tomov return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 64493fbe329SStan Tomov 64593fbe329SStan Tomov magma_init(); 64693fbe329SStan Tomov //magma_print_environment(); 64793fbe329SStan Tomov 64882b77998SStan Tomov ceed->VecCreate = CeedVectorCreate_Magma; 64982b77998SStan Tomov ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 65082b77998SStan Tomov ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 65182b77998SStan Tomov ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 65282b77998SStan Tomov ceed->OperatorCreate = CeedOperatorCreate_Magma; 65382b77998SStan Tomov return 0; 65482b77998SStan Tomov } 65582b77998SStan Tomov 65682b77998SStan Tomov // ***************************************************************************** 65782b77998SStan Tomov // * REGISTER 65882b77998SStan Tomov // ***************************************************************************** 65982b77998SStan Tomov __attribute__((constructor)) 66082b77998SStan Tomov static void Register(void) { 66182b77998SStan Tomov CeedRegister("/cpu/magma", CeedInit_Magma); 66282b77998SStan Tomov CeedRegister("/gpu/magma", CeedInit_Magma); 66382b77998SStan Tomov } 664