182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details. 482b77998SStan Tomov // 582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 782b77998SStan Tomov // element discretizations for exascale applications. For more information and 882b77998SStan Tomov // source code availability see http://github.com/ceed. 982b77998SStan Tomov // 1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early 1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 1682b77998SStan Tomov 1738612b08SStan Tomov #include "ceed-magma.h" 18*5a9ca9adSVeselin Dobrev #include <string.h> 1982b77998SStan Tomov 2082b77998SStan Tomov typedef struct { 2182b77998SStan Tomov CeedScalar *array; 2293fbe329SStan Tomov CeedScalar *darray; 23e2900954SStan Tomov int own_; 24c119dad6SStan Tomov int down_; 2582b77998SStan Tomov } CeedVector_Magma; 2682b77998SStan Tomov 2782b77998SStan Tomov typedef struct { 281751b0a9SStan Tomov CeedInt *indices; 291751b0a9SStan Tomov CeedInt *dindices; 301751b0a9SStan Tomov int own_; 31c119dad6SStan Tomov int down_; // cover a case where we own Device memory 3282b77998SStan Tomov } CeedElemRestriction_Magma; 3382b77998SStan Tomov 3482b77998SStan Tomov typedef struct { 3582b77998SStan Tomov CeedVector etmp; 3682b77998SStan Tomov CeedVector qdata; 3782b77998SStan Tomov } CeedOperator_Magma; 3882b77998SStan Tomov 3993fbe329SStan Tomov // ***************************************************************************** 4093fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode 4193fbe329SStan Tomov // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 4293fbe329SStan Tomov // * to array, and data is copied (not store passed pointer) 4393fbe329SStan Tomov // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 4493fbe329SStan Tomov // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 4593fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 4693fbe329SStan Tomov // ***************************************************************************** 4782b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 4882b77998SStan Tomov CeedCopyMode cmode, CeedScalar *array) { 4982b77998SStan Tomov CeedVector_Magma *impl = vec->data; 5082b77998SStan Tomov int ierr; 5182b77998SStan Tomov 52c119dad6SStan Tomov // If own data, free the "old" data, e.g., as it may be of different size 53e2900954SStan Tomov if (impl->own_) { 54e2900954SStan Tomov magma_free( impl->darray ); 55e2900954SStan Tomov magma_free_pinned( impl->array ); 56e2900954SStan Tomov impl->darray = NULL; 57e2900954SStan Tomov impl->array = NULL; 58e2900954SStan Tomov impl->own_ = 0; 59c119dad6SStan Tomov impl->down_= 0; 60e2900954SStan Tomov } 6193fbe329SStan Tomov 62e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 63e2900954SStan Tomov // memory is on the host; own_ = 0 6482b77998SStan Tomov switch (cmode) { 6582b77998SStan Tomov case CEED_COPY_VALUES: 6693fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 67e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 68e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 69e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 70e2900954SStan Tomov impl->own_ = 1; 71e2900954SStan Tomov 72c119dad6SStan Tomov if (array != NULL) 7393fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 7493fbe329SStan Tomov array, 1, impl->darray, 1); 7582b77998SStan Tomov break; 7682b77998SStan Tomov case CEED_OWN_POINTER: 7793fbe329SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 78e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 791751b0a9SStan Tomov // TODO: possible problem here is if we are passed non-pinned memory; 801751b0a9SStan Tomov // (as we own it, lter in destroy, we use free for pinned memory). 81e2900954SStan Tomov impl->array = array; 82e2900954SStan Tomov impl->own_ = 1; 83e2900954SStan Tomov 84c119dad6SStan Tomov if (array != NULL) 8593fbe329SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 8693fbe329SStan Tomov array, 1, impl->darray, 1); 8782b77998SStan Tomov break; 8882b77998SStan Tomov case CEED_USE_POINTER: 89c119dad6SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 90c119dad6SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 91c119dad6SStan Tomov magma_setvector(vec->length, sizeof(array[0]), 92c119dad6SStan Tomov array, 1, impl->darray, 1); 9382b77998SStan Tomov impl->array = array; 9482b77998SStan Tomov } 95e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 96e2900954SStan Tomov // memory is on the device; own = 0 97e2900954SStan Tomov switch (cmode) { 98e2900954SStan Tomov case CEED_COPY_VALUES: 99e2900954SStan Tomov ierr = magma_malloc( (void**)&impl->darray, 100e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 101e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 102e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 103e2900954SStan Tomov impl->own_ = 1; 104e2900954SStan Tomov 105e2900954SStan Tomov if (array) 106e2900954SStan Tomov magma_copyvector(vec->length, sizeof(array[0]), 107e2900954SStan Tomov array, 1, impl->darray, 1); 108e2900954SStan Tomov break; 109e2900954SStan Tomov case CEED_OWN_POINTER: 110e2900954SStan Tomov impl->darray = array; 111e2900954SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->array, 112e2900954SStan Tomov vec->length * sizeof(CeedScalar)); CeedChk(ierr); 113e2900954SStan Tomov impl->own_ = 1; 114e2900954SStan Tomov 115e2900954SStan Tomov break; 116e2900954SStan Tomov case CEED_USE_POINTER: 117e2900954SStan Tomov impl->darray = array; 118e2900954SStan Tomov impl->array = NULL; 119e2900954SStan Tomov } 120e2900954SStan Tomov 121e2900954SStan Tomov } else 122e2900954SStan Tomov return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 123e2900954SStan Tomov 12482b77998SStan Tomov return 0; 12582b77998SStan Tomov } 12682b77998SStan Tomov 12793fbe329SStan Tomov // ***************************************************************************** 128e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) 12993fbe329SStan Tomov // ***************************************************************************** 13082b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 13182b77998SStan Tomov CeedScalar **array) { 13282b77998SStan Tomov CeedVector_Magma *impl = vec->data; 13382b77998SStan Tomov int ierr; 13482b77998SStan Tomov 135e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 136c119dad6SStan Tomov if (impl->own_) { 137e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 138e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 139e2900954SStan Tomov impl->darray, 1, impl->array, 1); 140c119dad6SStan Tomov } else if (impl->array == NULL) { 141c119dad6SStan Tomov // Vector doesn't own the data and was set on GPU 142c119dad6SStan Tomov if (impl->darray == NULL) { 143c119dad6SStan Tomov // call was made just to allocate memory 144c119dad6SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 145c119dad6SStan Tomov CeedChk(ierr); 146c119dad6SStan Tomov } else 147c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 14882b77998SStan Tomov } 14982b77998SStan Tomov *array = impl->array; 150e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 151c119dad6SStan Tomov if (impl->darray == NULL) { 152c119dad6SStan Tomov // Vector doesn't own the data and was set on the CPU 153c119dad6SStan Tomov if (impl->array == NULL) { 154c119dad6SStan Tomov // call was made just to allocate memory 155e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 156e2900954SStan Tomov CeedChk(ierr); 157c119dad6SStan Tomov } else 158c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 159e2900954SStan Tomov } 160e2900954SStan Tomov *array = impl->darray; 161e2900954SStan Tomov } else 162e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 163e2900954SStan Tomov 16482b77998SStan Tomov return 0; 16582b77998SStan Tomov } 16682b77998SStan Tomov 16793fbe329SStan Tomov // ***************************************************************************** 168e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 16993fbe329SStan Tomov // ***************************************************************************** 17082b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 17182b77998SStan Tomov const CeedScalar **array) { 17282b77998SStan Tomov CeedVector_Magma *impl = vec->data; 17382b77998SStan Tomov int ierr; 17482b77998SStan Tomov 175e2900954SStan Tomov if (mtype == CEED_MEM_HOST) { 176c119dad6SStan Tomov if (impl->own_) { 177e2900954SStan Tomov // data is owned so GPU had the most up-to-date version; copy it 178e2900954SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 179e2900954SStan Tomov impl->darray, 1, impl->array, 1); 180c119dad6SStan Tomov } else if (impl->array == NULL) { 181c119dad6SStan Tomov // Vector doesn't own the data and was set on GPU 182c119dad6SStan Tomov if (impl->darray == NULL) { 183c119dad6SStan Tomov // call was made just to allocate memory 184c119dad6SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 185c119dad6SStan Tomov CeedChk(ierr); 186c119dad6SStan Tomov } else 187c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 18882b77998SStan Tomov } 18982b77998SStan Tomov *array = impl->array; 190e2900954SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 191c119dad6SStan Tomov if (impl->darray == NULL) { 192c119dad6SStan Tomov // Vector doesn't own the data and was set on the CPU 193c119dad6SStan Tomov if (impl->array == NULL) { 194c119dad6SStan Tomov // call was made just to allocate memory 195e2900954SStan Tomov ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 196e2900954SStan Tomov CeedChk(ierr); 197c119dad6SStan Tomov } else 198c119dad6SStan Tomov return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 199e2900954SStan Tomov } 200e2900954SStan Tomov *array = impl->darray; 201e2900954SStan Tomov } else 202e2900954SStan Tomov return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 203e2900954SStan Tomov 20482b77998SStan Tomov return 0; 20582b77998SStan Tomov } 20682b77998SStan Tomov 20793fbe329SStan Tomov // ***************************************************************************** 208e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 209e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 210e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 21193fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 21293fbe329SStan Tomov // * from vec and possibly modified them. 21393fbe329SStan Tomov // ***************************************************************************** 21482b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 21593fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 21693fbe329SStan Tomov 217c119dad6SStan Tomov // Check if the array is a CPU pointer 218c119dad6SStan Tomov if (*array == impl->array) { 219c119dad6SStan Tomov // Update device, if the device pointer is not NULL 220c119dad6SStan Tomov if (impl->darray != NULL) { 22193fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 22293fbe329SStan Tomov *array, 1, impl->darray, 1); 223*5a9ca9adSVeselin Dobrev } else { 224*5a9ca9adSVeselin Dobrev // nothing to do (case of CPU use pointer) 225*5a9ca9adSVeselin Dobrev } 226c119dad6SStan Tomov 227c119dad6SStan Tomov } else if (impl->down_) { 228c119dad6SStan Tomov // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 229c119dad6SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 230c119dad6SStan Tomov impl->darray, 1, impl->array, 1); 231c119dad6SStan Tomov } 232e2900954SStan Tomov 23382b77998SStan Tomov *array = NULL; 23482b77998SStan Tomov return 0; 23582b77998SStan Tomov } 23682b77998SStan Tomov 23793fbe329SStan Tomov // ***************************************************************************** 238e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST 239e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if 240e2900954SStan Tomov // * it was GPU memory we would not call this routine at all. 24193fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values 24293fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile 24393fbe329SStan Tomov // * and needs to be restored here. 24493fbe329SStan Tomov // ***************************************************************************** 24582b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 24682b77998SStan Tomov const CeedScalar **array) { 24793fbe329SStan Tomov CeedVector_Magma *impl = vec->data; 24893fbe329SStan Tomov 249c119dad6SStan Tomov // Check if the array is a CPU pointer 250c119dad6SStan Tomov if (*array == impl->array) { 251c119dad6SStan Tomov // Update device, if the device pointer is not NULL 252*5a9ca9adSVeselin Dobrev if (impl->darray != NULL) { 25393fbe329SStan Tomov magma_setvector(vec->length, sizeof(*array[0]), 25493fbe329SStan Tomov *array, 1, impl->darray, 1); 255*5a9ca9adSVeselin Dobrev } else { 256*5a9ca9adSVeselin Dobrev // nothing to do (case of CPU use pointer) 257*5a9ca9adSVeselin Dobrev } 258c119dad6SStan Tomov 259c119dad6SStan Tomov } else if (impl->down_) { 260c119dad6SStan Tomov // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 261c119dad6SStan Tomov magma_getvector(vec->length, sizeof(*array[0]), 262c119dad6SStan Tomov impl->darray, 1, impl->array, 1); 263c119dad6SStan Tomov } 264e2900954SStan Tomov 26582b77998SStan Tomov *array = NULL; 26682b77998SStan Tomov return 0; 26782b77998SStan Tomov } 26882b77998SStan Tomov 26982b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) { 27082b77998SStan Tomov CeedVector_Magma *impl = vec->data; 27182b77998SStan Tomov int ierr; 27282b77998SStan Tomov 273e2900954SStan Tomov // Free if we own the data 274e2900954SStan Tomov if (impl->own_) { 275e2900954SStan Tomov ierr = magma_free_pinned(impl->array); CeedChk(ierr); 27693fbe329SStan Tomov ierr = magma_free(impl->darray); CeedChk(ierr); 277c119dad6SStan Tomov } else if (impl->down_) { 278c119dad6SStan Tomov ierr = magma_free(impl->darray); CeedChk(ierr); 279e2900954SStan Tomov } 28082b77998SStan Tomov ierr = CeedFree(&vec->data); CeedChk(ierr); 28182b77998SStan Tomov return 0; 28282b77998SStan Tomov } 28382b77998SStan Tomov 28493fbe329SStan Tomov // ***************************************************************************** 28593fbe329SStan Tomov // * Create vector vec of size n 28693fbe329SStan Tomov // ***************************************************************************** 28782b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 28882b77998SStan Tomov CeedVector_Magma *impl; 28982b77998SStan Tomov int ierr; 29082b77998SStan Tomov 29182b77998SStan Tomov vec->SetArray = CeedVectorSetArray_Magma; 29282b77998SStan Tomov vec->GetArray = CeedVectorGetArray_Magma; 29382b77998SStan Tomov vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 29482b77998SStan Tomov vec->RestoreArray = CeedVectorRestoreArray_Magma; 29582b77998SStan Tomov vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 29682b77998SStan Tomov vec->Destroy = CeedVectorDestroy_Magma; 29782b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 29893fbe329SStan Tomov impl->darray = NULL; 299e2900954SStan Tomov impl->array = NULL; 300e2900954SStan Tomov impl->own_ = 0; 301c119dad6SStan Tomov impl->down_= 0; 30282b77998SStan Tomov vec->data = impl; 30382b77998SStan Tomov return 0; 30482b77998SStan Tomov } 30582b77998SStan Tomov 30693fbe329SStan Tomov // ***************************************************************************** 30793fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u 30893fbe329SStan Tomov // ***************************************************************************** 30982b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 31082b77998SStan Tomov CeedTransposeMode tmode, CeedInt ncomp, 31182b77998SStan Tomov CeedTransposeMode lmode, CeedVector u, 31282b77998SStan Tomov CeedVector v, CeedRequest *request) { 31382b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 31482b77998SStan Tomov int ierr; 31582b77998SStan Tomov const CeedScalar *uu; 31682b77998SStan Tomov CeedScalar *vv; 317c119dad6SStan Tomov CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof; 318c119dad6SStan Tomov CeedInt esize = nelem * elemsize; 319137a0714SStan Tomov // CeedInt *indices = impl->indices; 320137a0714SStan Tomov CeedInt *dindices = impl->dindices; 32182b77998SStan Tomov 322c119dad6SStan Tomov //ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 323c119dad6SStan Tomov //ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 324c119dad6SStan Tomov 325c119dad6SStan Tomov // Get pointers on the device 326c119dad6SStan Tomov ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr); 327c119dad6SStan Tomov ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr); 328c119dad6SStan Tomov 32982b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 33082b77998SStan Tomov // Perform: v = r * u 33182b77998SStan Tomov if (ncomp == 1) { 332c119dad6SStan Tomov //for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]]; 333e2900954SStan Tomov 334c119dad6SStan Tomov magma_template<<i=0:esize>> 335c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 336c119dad6SStan Tomov vv[i] = uu[dindices[i]]; 337e2900954SStan Tomov } 33882b77998SStan Tomov } else { 33982b77998SStan Tomov // vv is (elemsize x ncomp x nelem), column-major 34082b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 341c119dad6SStan Tomov /* 342c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 34382b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 344c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 345c119dad6SStan Tomov vv[i + elemsize*(d+ncomp*e)] = 346c119dad6SStan Tomov uu[indices[i+elemsize*e]+ndof*d]; 347c119dad6SStan Tomov } 348c119dad6SStan Tomov */ 349c119dad6SStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 350c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) { 351c119dad6SStan Tomov vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d]; 35282b77998SStan Tomov } 35382b77998SStan Tomov } else { // u is (ncomp x ndof), column-major 354c119dad6SStan Tomov /* 355c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 35682b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 357c119dad6SStan Tomov for (CeedInt i=0; i< elemsize; i++) { 358c119dad6SStan Tomov vv[i + elemsize*(d+ncomp*e)] = 359c119dad6SStan Tomov uu[d+ncomp*indices[i+elemsize*e]]; 360c119dad6SStan Tomov } 361c119dad6SStan Tomov */ 362c119dad6SStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 363c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 364c119dad6SStan Tomov vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]]; 36582b77998SStan Tomov } 36682b77998SStan Tomov } 36782b77998SStan Tomov } 36882b77998SStan Tomov } else { 36982b77998SStan Tomov // Note: in transpose mode, we perform: v += r^t * u 37082b77998SStan Tomov if (ncomp == 1) { 371c119dad6SStan Tomov // fprintf(stderr,"3 ---------\n"); 372c119dad6SStan Tomov // for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i]; 373c119dad6SStan Tomov magma_template<<i=0:esize>> 374c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 375c119dad6SStan Tomov magmablas_datomic_add( &vv[dindices[i]], uu[i]); 376c119dad6SStan Tomov } 377c119dad6SStan Tomov } else { // u is (elemsize x ncomp x nelem) 378c119dad6SStan Tomov fprintf(stderr,"2 ---------\n"); 379c119dad6SStan Tomov 38082b77998SStan Tomov if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 381c119dad6SStan Tomov /* 382c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 38382b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 384c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 385c119dad6SStan Tomov vv[indices[i + elemsize*e]+ndof*d] += 386c119dad6SStan Tomov uu[i + elemsize*(d+e*ncomp)]; 387c119dad6SStan Tomov } 388c119dad6SStan Tomov */ 389c119dad6SStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 390c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) { 391c119dad6SStan Tomov magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d], 392c119dad6SStan Tomov uu[i+iend*(d+e*dend)]); 39382b77998SStan Tomov } 39482b77998SStan Tomov } else { // vv is (ncomp x ndof), column-major 395c119dad6SStan Tomov /* 396c119dad6SStan Tomov for (CeedInt e = 0; e < nelem; e++) 39782b77998SStan Tomov for (CeedInt d = 0; d < ncomp; d++) 398c119dad6SStan Tomov for (CeedInt i=0; i < elemsize; i++) { 399c119dad6SStan Tomov vv[d+ncomp*indices[i + elemsize*e]] += 400c119dad6SStan Tomov uu[i + elemsize*(d+e*ncomp)]; 401c119dad6SStan Tomov } 402c119dad6SStan Tomov */ 403c119dad6SStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 404c119dad6SStan Tomov (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 405c119dad6SStan Tomov magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]], 406c119dad6SStan Tomov uu[i+iend*(d+e*dend)]); 40782b77998SStan Tomov } 40882b77998SStan Tomov } 40982b77998SStan Tomov } 41082b77998SStan Tomov } 41182b77998SStan Tomov ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 41282b77998SStan Tomov ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 413c119dad6SStan Tomov 41482b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 41582b77998SStan Tomov *request = NULL; 41682b77998SStan Tomov return 0; 41782b77998SStan Tomov } 41882b77998SStan Tomov 41982b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 42082b77998SStan Tomov CeedElemRestriction_Magma *impl = r->data; 42182b77998SStan Tomov int ierr; 42282b77998SStan Tomov 4231751b0a9SStan Tomov // Free if we own the data 4241751b0a9SStan Tomov if (impl->own_) { 4251751b0a9SStan Tomov ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 4261751b0a9SStan Tomov ierr = magma_free(impl->dindices); CeedChk(ierr); 4271981b6e4STzanio } else if (impl->down_) { 428c119dad6SStan Tomov ierr = magma_free(impl->dindices); CeedChk(ierr); 429c119dad6SStan Tomov } 43082b77998SStan Tomov ierr = CeedFree(&r->data); CeedChk(ierr); 43182b77998SStan Tomov return 0; 43282b77998SStan Tomov } 43382b77998SStan Tomov 43482b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 43582b77998SStan Tomov CeedMemType mtype, 43693fbe329SStan Tomov CeedCopyMode cmode, 43793fbe329SStan Tomov const CeedInt *indices) { 4381751b0a9SStan Tomov int ierr, size = r->nelem*r->elemsize; 43982b77998SStan Tomov CeedElemRestriction_Magma *impl; 44082b77998SStan Tomov 4411751b0a9SStan Tomov // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 44282b77998SStan Tomov ierr = CeedCalloc(1,&impl); CeedChk(ierr); 4431751b0a9SStan Tomov impl->dindices = NULL; 4441751b0a9SStan Tomov impl->indices = NULL; 4451751b0a9SStan Tomov impl->own_ = 0; 446c119dad6SStan Tomov impl->down_= 0; 4471751b0a9SStan Tomov 4481751b0a9SStan Tomov if (mtype == CEED_MEM_HOST) { 4491751b0a9SStan Tomov // memory is on the host; own_ = 0 45082b77998SStan Tomov switch (cmode) { 45182b77998SStan Tomov case CEED_COPY_VALUES: 4521751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 4531751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4541751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 4551751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4561751b0a9SStan Tomov impl->own_ = 1; 4571751b0a9SStan Tomov 458c119dad6SStan Tomov if (indices != NULL) 4591751b0a9SStan Tomov magma_setvector(size, sizeof(CeedInt), 4601751b0a9SStan Tomov indices, 1, impl->dindices, 1); 46182b77998SStan Tomov break; 46282b77998SStan Tomov case CEED_OWN_POINTER: 4631751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 4641751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4651751b0a9SStan Tomov // TODO: possible problem here is if we are passed non-pinned memory; 4661751b0a9SStan Tomov // (as we own it, lter in destroy, we use free for pinned memory). 467137a0714SStan Tomov impl->indices = (CeedInt *)indices; 4681751b0a9SStan Tomov impl->own_ = 1; 4691751b0a9SStan Tomov 470c119dad6SStan Tomov if (indices != NULL) 4711751b0a9SStan Tomov magma_setvector(size, sizeof(CeedInt), 4721751b0a9SStan Tomov indices, 1, impl->dindices, 1); 47382b77998SStan Tomov break; 47482b77998SStan Tomov case CEED_USE_POINTER: 475c119dad6SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 476c119dad6SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 477c119dad6SStan Tomov magma_setvector(size, sizeof(CeedInt), 478c119dad6SStan Tomov indices, 1, impl->dindices, 1); 479c119dad6SStan Tomov impl->down_ = 1; 480137a0714SStan Tomov impl->indices = (CeedInt *)indices; 48182b77998SStan Tomov } 4821751b0a9SStan Tomov } else if (mtype == CEED_MEM_DEVICE) { 4831751b0a9SStan Tomov // memory is on the device; own = 0 4841751b0a9SStan Tomov switch (cmode) { 4851751b0a9SStan Tomov case CEED_COPY_VALUES: 4861751b0a9SStan Tomov ierr = magma_malloc( (void**)&impl->dindices, 4871751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4881751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 4891751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 4901751b0a9SStan Tomov impl->own_ = 1; 4911751b0a9SStan Tomov 4921751b0a9SStan Tomov if (indices) 4931751b0a9SStan Tomov magma_copyvector(size, sizeof(CeedInt), 4941751b0a9SStan Tomov indices, 1, impl->dindices, 1); 4951751b0a9SStan Tomov break; 4961751b0a9SStan Tomov case CEED_OWN_POINTER: 497137a0714SStan Tomov impl->dindices = (CeedInt *)indices; 4981751b0a9SStan Tomov ierr = magma_malloc_pinned( (void**)&impl->indices, 4991751b0a9SStan Tomov size * sizeof(CeedInt)); CeedChk(ierr); 5001751b0a9SStan Tomov impl->own_ = 1; 5011751b0a9SStan Tomov 5021751b0a9SStan Tomov break; 5031751b0a9SStan Tomov case CEED_USE_POINTER: 504137a0714SStan Tomov impl->dindices = (CeedInt *)indices; 5051751b0a9SStan Tomov impl->indices = NULL; 5061751b0a9SStan Tomov } 5071751b0a9SStan Tomov 5081751b0a9SStan Tomov } else 5091751b0a9SStan Tomov return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 5101751b0a9SStan Tomov 51182b77998SStan Tomov r->data = impl; 51282b77998SStan Tomov r->Apply = CeedElemRestrictionApply_Magma; 51382b77998SStan Tomov r->Destroy = CeedElemRestrictionDestroy_Magma; 5141751b0a9SStan Tomov 51582b77998SStan Tomov return 0; 51682b77998SStan Tomov } 51782b77998SStan Tomov 51882b77998SStan Tomov // Contracts on the middle index 51982b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc 52082b77998SStan Tomov // TRANSPOSE: V_ajc = T_bj U_abc 52182b77998SStan Tomov // If Add != 0, "=" is replaced by "+=" 52282b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed, 52382b77998SStan Tomov CeedInt A, CeedInt B, CeedInt C, CeedInt J, 52482b77998SStan Tomov const CeedScalar *t, CeedTransposeMode tmode, 52582b77998SStan Tomov const CeedInt Add, 52682b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 52738612b08SStan Tomov #ifdef USE_MAGMA_BATCH 52838612b08SStan Tomov magma_dtensor_contract(ceed, A, B, C, J, t, tmode, Add, u, v); 52938612b08SStan Tomov #else 53082b77998SStan Tomov CeedInt tstride0 = B, tstride1 = 1; 53182b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 53282b77998SStan Tomov tstride0 = 1; tstride1 = J; 53382b77998SStan Tomov } 53438612b08SStan Tomov CeedDebug("\033[31m[CeedTensorContract] A=%d, J=%d, C=%d, B=%d: %d %d %d", 53538612b08SStan Tomov A,J,C,B,A*J*B*C, C*J*A, C*B*A); 53682b77998SStan Tomov for (CeedInt a=0; a<A; a++) { 53782b77998SStan Tomov for (CeedInt j=0; j<J; j++) { 53882b77998SStan Tomov if (!Add) { 53982b77998SStan Tomov for (CeedInt c=0; c<C; c++) 54082b77998SStan Tomov v[(a*J+j)*C+c] = 0; 54182b77998SStan Tomov } 54282b77998SStan Tomov for (CeedInt b=0; b<B; b++) { 54382b77998SStan Tomov for (CeedInt c=0; c<C; c++) { 54482b77998SStan Tomov v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 54582b77998SStan Tomov } 54682b77998SStan Tomov } 54782b77998SStan Tomov } 54882b77998SStan Tomov } 54938612b08SStan Tomov #endif 55082b77998SStan Tomov return 0; 55182b77998SStan Tomov } 55282b77998SStan Tomov 55382b77998SStan Tomov static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 55482b77998SStan Tomov CeedEvalMode emode, 55582b77998SStan Tomov const CeedScalar *u, CeedScalar *v) { 55682b77998SStan Tomov int ierr; 55782b77998SStan Tomov const CeedInt dim = basis->dim; 55882b77998SStan Tomov const CeedInt ndof = basis->ndof; 55982b77998SStan Tomov const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 56082b77998SStan Tomov const CeedInt add = (tmode == CEED_TRANSPOSE); 56182b77998SStan Tomov 56238612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d", 56338612b08SStan Tomov ndof*CeedPowInt(basis->P1d, dim)); 56438612b08SStan Tomov 56582b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 56682b77998SStan Tomov const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 56782b77998SStan Tomov for (CeedInt i = 0; i < vsize; i++) 56882b77998SStan Tomov v[i] = (CeedScalar) 0; 56982b77998SStan Tomov } 57082b77998SStan Tomov if (emode & CEED_EVAL_INTERP) { 57182b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 57282b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 57382b77998SStan Tomov P = basis->Q1d; Q = basis->P1d; 57482b77998SStan Tomov } 57582b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 57682b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 57738612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 57838612b08SStan Tomov ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)); 57982b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 58082b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 58182b77998SStan Tomov tmode, add&&(d==dim-1), 58282b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 58382b77998SStan Tomov CeedChk(ierr); 58482b77998SStan Tomov pre /= P; 58582b77998SStan Tomov post *= Q; 58682b77998SStan Tomov } 58782b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 58882b77998SStan Tomov v += nqpt; 58982b77998SStan Tomov } else { 59082b77998SStan Tomov u += nqpt; 59182b77998SStan Tomov } 59282b77998SStan Tomov } 59382b77998SStan Tomov if (emode & CEED_EVAL_GRAD) { 59482b77998SStan Tomov CeedInt P = basis->P1d, Q = basis->Q1d; 59582b77998SStan Tomov // In CEED_NOTRANSPOSE mode: 59682b77998SStan Tomov // u is (P^dim x nc), column-major layout (nc = ndof) 59782b77998SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 59882b77998SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 59982b77998SStan Tomov if (tmode == CEED_TRANSPOSE) { 60082b77998SStan Tomov P = basis->Q1d, Q = basis->P1d; 60182b77998SStan Tomov } 60282b77998SStan Tomov CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 60338612b08SStan Tomov CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 60438612b08SStan Tomov ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)); 60582b77998SStan Tomov for (CeedInt p = 0; p < dim; p++) { 60682b77998SStan Tomov CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 60782b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 60882b77998SStan Tomov ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 60982b77998SStan Tomov (p==d)?basis->grad1d:basis->interp1d, 61082b77998SStan Tomov tmode, add&&(d==dim-1), 61182b77998SStan Tomov d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 61282b77998SStan Tomov CeedChk(ierr); 61382b77998SStan Tomov pre /= P; 61482b77998SStan Tomov post *= Q; 61582b77998SStan Tomov } 61682b77998SStan Tomov if (tmode == CEED_NOTRANSPOSE) { 61782b77998SStan Tomov v += nqpt; 61882b77998SStan Tomov } else { 61982b77998SStan Tomov u += nqpt; 62082b77998SStan Tomov } 62182b77998SStan Tomov } 62282b77998SStan Tomov } 62382b77998SStan Tomov if (emode & CEED_EVAL_WEIGHT) { 62482b77998SStan Tomov if (tmode == CEED_TRANSPOSE) 62582b77998SStan Tomov return CeedError(basis->ceed, 1, 62682b77998SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 62782b77998SStan Tomov CeedInt Q = basis->Q1d; 62882b77998SStan Tomov for (CeedInt d=0; d<dim; d++) { 62982b77998SStan Tomov CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 63082b77998SStan Tomov for (CeedInt i=0; i<pre; i++) { 63182b77998SStan Tomov for (CeedInt j=0; j<Q; j++) { 63282b77998SStan Tomov for (CeedInt k=0; k<post; k++) { 63382b77998SStan Tomov v[(i*Q + j)*post + k] = basis->qweight1d[j] 63482b77998SStan Tomov * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 63582b77998SStan Tomov } 63682b77998SStan Tomov } 63782b77998SStan Tomov } 63882b77998SStan Tomov } 63982b77998SStan Tomov } 64082b77998SStan Tomov return 0; 64182b77998SStan Tomov } 64282b77998SStan Tomov 64382b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) { 64482b77998SStan Tomov return 0; 64582b77998SStan Tomov } 64682b77998SStan Tomov 64782b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 64882b77998SStan Tomov CeedInt Q1d, const CeedScalar *interp1d, 64982b77998SStan Tomov const CeedScalar *grad1d, 65082b77998SStan Tomov const CeedScalar *qref1d, 65182b77998SStan Tomov const CeedScalar *qweight1d, 65282b77998SStan Tomov CeedBasis basis) { 65382b77998SStan Tomov basis->Apply = CeedBasisApply_Magma; 65482b77998SStan Tomov basis->Destroy = CeedBasisDestroy_Magma; 65582b77998SStan Tomov return 0; 65682b77998SStan Tomov } 65782b77998SStan Tomov 65882b77998SStan Tomov static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 65982b77998SStan Tomov const CeedScalar *const *u, 66082b77998SStan Tomov CeedScalar *const *v) { 66182b77998SStan Tomov int ierr; 66282b77998SStan Tomov ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 66382b77998SStan Tomov return 0; 66482b77998SStan Tomov } 66582b77998SStan Tomov 66682b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 66782b77998SStan Tomov return 0; 66882b77998SStan Tomov } 66982b77998SStan Tomov 67082b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 67182b77998SStan Tomov qf->Apply = CeedQFunctionApply_Magma; 67282b77998SStan Tomov qf->Destroy = CeedQFunctionDestroy_Magma; 67382b77998SStan Tomov return 0; 67482b77998SStan Tomov } 67582b77998SStan Tomov 67682b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) { 67782b77998SStan Tomov CeedOperator_Magma *impl = op->data; 67882b77998SStan Tomov int ierr; 67982b77998SStan Tomov 68082b77998SStan Tomov ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 68182b77998SStan Tomov ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 68282b77998SStan Tomov ierr = CeedFree(&op->data); CeedChk(ierr); 68382b77998SStan Tomov return 0; 68482b77998SStan Tomov } 68582b77998SStan Tomov 68682b77998SStan Tomov static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 68782b77998SStan Tomov CeedVector ustate, 68882b77998SStan Tomov CeedVector residual, CeedRequest *request) { 68982b77998SStan Tomov CeedOperator_Magma *impl = op->data; 69082b77998SStan Tomov CeedVector etmp; 69182b77998SStan Tomov CeedInt Q; 69282b77998SStan Tomov const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 69382b77998SStan Tomov CeedScalar *Eu; 69482b77998SStan Tomov char *qd; 69582b77998SStan Tomov int ierr; 69682b77998SStan Tomov CeedTransposeMode lmode = CEED_NOTRANSPOSE; 69782b77998SStan Tomov 69882b77998SStan Tomov if (!impl->etmp) { 69982b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 70082b77998SStan Tomov nc * op->Erestrict->nelem * op->Erestrict->elemsize, 70182b77998SStan Tomov &impl->etmp); CeedChk(ierr); 70282b77998SStan Tomov // etmp is allocated when CeedVectorGetArray is called below 70382b77998SStan Tomov } 70482b77998SStan Tomov etmp = impl->etmp; 70582b77998SStan Tomov if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 70682b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 70782b77998SStan Tomov nc, lmode, ustate, etmp, 70882b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 70982b77998SStan Tomov } 71082b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 71182b77998SStan Tomov ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 71282b77998SStan Tomov ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 71382b77998SStan Tomov CeedChk(ierr); 71438612b08SStan Tomov 71582b77998SStan Tomov for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 71682b77998SStan Tomov CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 71782b77998SStan Tomov const CeedScalar *in[5] = {0,0,0,0,0}; 71882b77998SStan Tomov // TODO: quadrature weights can be computed just once 71938612b08SStan Tomov CeedDebug("\033[11m[CeedOperatorApply_Magma] e=%d: Eu+%d, %d", 72038612b08SStan Tomov e, e*op->Erestrict->elemsize*nc, Q*nc*(dim+2)); 72182b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 72282b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc], BEu); 72382b77998SStan Tomov CeedChk(ierr); 72482b77998SStan Tomov CeedScalar *u_ptr = BEu, *v_ptr = BEv; 72582b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 72682b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 72782b77998SStan Tomov if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 72882b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 72982b77998SStan Tomov if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 73082b77998SStan Tomov ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 73182b77998SStan Tomov CeedChk(ierr); 73238612b08SStan Tomov CeedDebug("\033[31m[CeedOperatorApply_Magma] e=%d: ",e); 73382b77998SStan Tomov ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 73482b77998SStan Tomov &Eu[e*op->Erestrict->elemsize*nc]); 73582b77998SStan Tomov CeedChk(ierr); 73682b77998SStan Tomov } 73782b77998SStan Tomov ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 73882b77998SStan Tomov if (residual) { 73982b77998SStan Tomov CeedScalar *res; 74082b77998SStan Tomov CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 74182b77998SStan Tomov for (int i = 0; i < residual->length; i++) 74282b77998SStan Tomov res[i] = (CeedScalar)0; 74382b77998SStan Tomov ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 74482b77998SStan Tomov nc, lmode, etmp, residual, 74582b77998SStan Tomov CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 74682b77998SStan Tomov } 74782b77998SStan Tomov if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 74882b77998SStan Tomov *request = NULL; 74982b77998SStan Tomov return 0; 75082b77998SStan Tomov } 75182b77998SStan Tomov 75282b77998SStan Tomov static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 75382b77998SStan Tomov CeedOperator_Magma *impl = op->data; 75482b77998SStan Tomov int ierr; 75582b77998SStan Tomov 75682b77998SStan Tomov if (!impl->qdata) { 75782b77998SStan Tomov CeedInt Q; 75882b77998SStan Tomov ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 75982b77998SStan Tomov ierr = CeedVectorCreate(op->ceed, 76082b77998SStan Tomov op->Erestrict->nelem * Q 76182b77998SStan Tomov * op->qf->qdatasize / sizeof(CeedScalar), 76282b77998SStan Tomov &impl->qdata); CeedChk(ierr); 76382b77998SStan Tomov } 76482b77998SStan Tomov *qdata = impl->qdata; 76582b77998SStan Tomov return 0; 76682b77998SStan Tomov } 76782b77998SStan Tomov 76882b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) { 76982b77998SStan Tomov CeedOperator_Magma *impl; 77082b77998SStan Tomov int ierr; 77182b77998SStan Tomov 77282b77998SStan Tomov ierr = CeedCalloc(1, &impl); CeedChk(ierr); 77382b77998SStan Tomov op->data = impl; 77482b77998SStan Tomov op->Destroy = CeedOperatorDestroy_Magma; 77582b77998SStan Tomov op->Apply = CeedOperatorApply_Magma; 77682b77998SStan Tomov op->GetQData = CeedOperatorGetQData_Magma; 77782b77998SStan Tomov return 0; 77882b77998SStan Tomov } 77982b77998SStan Tomov 78082b77998SStan Tomov // ***************************************************************************** 78182b77998SStan Tomov // * INIT 78282b77998SStan Tomov // ***************************************************************************** 78382b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) { 78482b77998SStan Tomov if (strcmp(resource, "/cpu/magma") 78582b77998SStan Tomov && strcmp(resource, "/gpu/magma")) 78682b77998SStan Tomov return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 78793fbe329SStan Tomov 78893fbe329SStan Tomov magma_init(); 78993fbe329SStan Tomov //magma_print_environment(); 79093fbe329SStan Tomov 79182b77998SStan Tomov ceed->VecCreate = CeedVectorCreate_Magma; 79282b77998SStan Tomov ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 79382b77998SStan Tomov ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 79482b77998SStan Tomov ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 79582b77998SStan Tomov ceed->OperatorCreate = CeedOperatorCreate_Magma; 79682b77998SStan Tomov return 0; 79782b77998SStan Tomov } 79882b77998SStan Tomov 79982b77998SStan Tomov // ***************************************************************************** 80082b77998SStan Tomov // * REGISTER 80182b77998SStan Tomov // ***************************************************************************** 80282b77998SStan Tomov __attribute__((constructor)) 80382b77998SStan Tomov static void Register(void) { 80482b77998SStan Tomov CeedRegister("/cpu/magma", CeedInit_Magma); 80582b77998SStan Tomov CeedRegister("/gpu/magma", CeedInit_Magma); 80682b77998SStan Tomov } 807