xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.c (revision 44951fc6642471e2b443ede8ee5bcff8115d816d)
182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details.
482b77998SStan Tomov //
582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
782b77998SStan Tomov // element discretizations for exascale applications. For more information and
882b77998SStan Tomov // source code availability see http://github.com/ceed.
982b77998SStan Tomov //
1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early
1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
1682b77998SStan Tomov 
1738612b08SStan Tomov #include "ceed-magma.h"
185a9ca9adSVeselin Dobrev #include <string.h>
1982b77998SStan Tomov 
2082b77998SStan Tomov typedef struct {
2182b77998SStan Tomov   CeedScalar *array;
2293fbe329SStan Tomov   CeedScalar *darray;
23e2900954SStan Tomov   int  own_;
24c119dad6SStan Tomov   int down_;
2582b77998SStan Tomov } CeedVector_Magma;
2682b77998SStan Tomov 
2782b77998SStan Tomov typedef struct {
281751b0a9SStan Tomov   CeedInt *indices;
291751b0a9SStan Tomov   CeedInt *dindices;
301751b0a9SStan Tomov   int  own_;
31c119dad6SStan Tomov   int down_;            // cover a case where we own Device memory
3282b77998SStan Tomov } CeedElemRestriction_Magma;
3382b77998SStan Tomov 
3482b77998SStan Tomov typedef struct {
3582b77998SStan Tomov   CeedVector etmp;
3682b77998SStan Tomov   CeedVector qdata;
3782b77998SStan Tomov } CeedOperator_Magma;
3882b77998SStan Tomov 
3993fbe329SStan Tomov // *****************************************************************************
4093fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode
4193fbe329SStan Tomov // *   CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal
4293fbe329SStan Tomov // *                     to array, and data is copied (not store passed pointer)
4393fbe329SStan Tomov // *   CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array
4493fbe329SStan Tomov // *   CEED_USE_POINTER: vec->data->array = array (can modify; no ownership)
4593fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE
4693fbe329SStan Tomov // *****************************************************************************
4782b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype,
4882b77998SStan Tomov                                     CeedCopyMode cmode, CeedScalar *array) {
4982b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
5082b77998SStan Tomov   int ierr;
5182b77998SStan Tomov 
52c119dad6SStan Tomov   // If own data, free the "old" data, e.g., as it may be of different size
53e2900954SStan Tomov   if (impl->own_) {
54e2900954SStan Tomov     magma_free( impl->darray );
55e2900954SStan Tomov     magma_free_pinned( impl->array );
56e2900954SStan Tomov     impl->darray = NULL;
57e2900954SStan Tomov     impl->array  = NULL;
58e2900954SStan Tomov     impl->own_ = 0;
59c119dad6SStan Tomov     impl->down_= 0;
60e2900954SStan Tomov   }
6193fbe329SStan Tomov 
62e2900954SStan Tomov   if (mtype == CEED_MEM_HOST) {
63e2900954SStan Tomov     // memory is on the host; own_ = 0
6482b77998SStan Tomov     switch (cmode) {
6582b77998SStan Tomov     case CEED_COPY_VALUES:
6693fbe329SStan Tomov       ierr = magma_malloc( (void**)&impl->darray,
67e2900954SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
68e2900954SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->array,
69e2900954SStan Tomov                                   vec->length * sizeof(CeedScalar)); CeedChk(ierr);
70e2900954SStan Tomov       impl->own_ = 1;
71e2900954SStan Tomov 
72c119dad6SStan Tomov       if (array != NULL)
7393fbe329SStan Tomov         magma_setvector(vec->length, sizeof(array[0]),
7493fbe329SStan Tomov                         array, 1, impl->darray, 1);
7582b77998SStan Tomov       break;
7682b77998SStan Tomov     case CEED_OWN_POINTER:
7793fbe329SStan Tomov       ierr = magma_malloc( (void**)&impl->darray,
78e2900954SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
791751b0a9SStan Tomov       // TODO: possible problem here is if we are passed non-pinned memory;
801751b0a9SStan Tomov       //       (as we own it, lter in destroy, we use free for pinned memory).
81e2900954SStan Tomov       impl->array = array;
82e2900954SStan Tomov       impl->own_ = 1;
83e2900954SStan Tomov 
84c119dad6SStan Tomov       if (array != NULL)
8593fbe329SStan Tomov         magma_setvector(vec->length, sizeof(array[0]),
8693fbe329SStan Tomov                         array, 1, impl->darray, 1);
8782b77998SStan Tomov       break;
8882b77998SStan Tomov     case CEED_USE_POINTER:
89c119dad6SStan Tomov       ierr = magma_malloc( (void**)&impl->darray,
90c119dad6SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
91c119dad6SStan Tomov       magma_setvector(vec->length, sizeof(array[0]),
92c119dad6SStan Tomov                       array, 1, impl->darray, 1);
9306b636dbSStan Tomov 
9406b636dbSStan Tomov       impl->down_  = 1;
9582b77998SStan Tomov       impl->array  = array;
9682b77998SStan Tomov     }
97e2900954SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
98e2900954SStan Tomov     // memory is on the device; own = 0
99e2900954SStan Tomov     switch (cmode) {
100e2900954SStan Tomov     case CEED_COPY_VALUES:
101e2900954SStan Tomov       ierr = magma_malloc( (void**)&impl->darray,
102e2900954SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
103e2900954SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->array,
104e2900954SStan Tomov                                   vec->length * sizeof(CeedScalar)); CeedChk(ierr);
105e2900954SStan Tomov       impl->own_ = 1;
106e2900954SStan Tomov 
107e2900954SStan Tomov       if (array)
108e2900954SStan Tomov         magma_copyvector(vec->length, sizeof(array[0]),
109e2900954SStan Tomov                          array, 1, impl->darray, 1);
11006b636dbSStan Tomov       else
11106b636dbSStan Tomov         // t30 assumes allocation initializes with 0s
11206b636dbSStan Tomov         magma_setvector(vec->length, sizeof(array[0]),
11306b636dbSStan Tomov                         impl->array, 1, impl->darray, 1);
114e2900954SStan Tomov       break;
115e2900954SStan Tomov     case CEED_OWN_POINTER:
116e2900954SStan Tomov       impl->darray = array;
117e2900954SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->array,
118e2900954SStan Tomov                                   vec->length * sizeof(CeedScalar)); CeedChk(ierr);
119e2900954SStan Tomov       impl->own_ = 1;
120e2900954SStan Tomov 
121e2900954SStan Tomov       break;
122e2900954SStan Tomov     case CEED_USE_POINTER:
123e2900954SStan Tomov       impl->darray = array;
124e2900954SStan Tomov       impl->array  = NULL;
125e2900954SStan Tomov     }
126e2900954SStan Tomov 
127e2900954SStan Tomov   } else
128e2900954SStan Tomov     return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported");
129e2900954SStan Tomov 
13082b77998SStan Tomov   return 0;
13182b77998SStan Tomov }
13282b77998SStan Tomov 
13393fbe329SStan Tomov // *****************************************************************************
134e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE)
13593fbe329SStan Tomov // *****************************************************************************
13682b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype,
13782b77998SStan Tomov                                     CeedScalar **array) {
13882b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
13982b77998SStan Tomov   int ierr;
14082b77998SStan Tomov 
141e2900954SStan Tomov   if (mtype == CEED_MEM_HOST) {
142c119dad6SStan Tomov     if (impl->own_) {
143e2900954SStan Tomov       // data is owned so GPU had the most up-to-date version; copy it
14406b636dbSStan Tomov       // TTT - apparantly it doesn't have most up to date data
145e2900954SStan Tomov       magma_getvector(vec->length, sizeof(*array[0]),
146e2900954SStan Tomov                       impl->darray, 1, impl->array, 1);
14706b636dbSStan Tomov       CeedDebug("\033[31m[CeedVectorGetArray_Magma]");
14806b636dbSStan Tomov       //fprintf(stderr,"rrrrrrrrrrrrrrr\n");
149c119dad6SStan Tomov     } else if (impl->array == NULL) {
150c119dad6SStan Tomov       // Vector doesn't own the data and was set on GPU
151c119dad6SStan Tomov       if (impl->darray == NULL) {
152c119dad6SStan Tomov         // call was made just to allocate memory
153c119dad6SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
154c119dad6SStan Tomov         CeedChk(ierr);
155c119dad6SStan Tomov       } else
156c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST");
15782b77998SStan Tomov     }
15882b77998SStan Tomov     *array = impl->array;
159e2900954SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
160c119dad6SStan Tomov     if (impl->darray == NULL) {
161c119dad6SStan Tomov       // Vector doesn't own the data and was set on the CPU
162c119dad6SStan Tomov       if (impl->array == NULL) {
163c119dad6SStan Tomov         // call was made just to allocate memory
164e2900954SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
165e2900954SStan Tomov         CeedChk(ierr);
166c119dad6SStan Tomov       } else
167c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE");
168e2900954SStan Tomov     }
169e2900954SStan Tomov     *array = impl->darray;
170e2900954SStan Tomov   } else
171e2900954SStan Tomov     return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory");
172e2900954SStan Tomov 
17382b77998SStan Tomov   return 0;
17482b77998SStan Tomov }
17582b77998SStan Tomov 
17693fbe329SStan Tomov // *****************************************************************************
177e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it
17893fbe329SStan Tomov // *****************************************************************************
17982b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype,
18082b77998SStan Tomov                                         const CeedScalar **array) {
18182b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
18282b77998SStan Tomov   int ierr;
18382b77998SStan Tomov 
184e2900954SStan Tomov   if (mtype == CEED_MEM_HOST) {
185c119dad6SStan Tomov     if (impl->own_) {
186e2900954SStan Tomov       // data is owned so GPU had the most up-to-date version; copy it
187e2900954SStan Tomov       magma_getvector(vec->length, sizeof(*array[0]),
188e2900954SStan Tomov                       impl->darray, 1, impl->array, 1);
189c119dad6SStan Tomov     } else if (impl->array == NULL) {
190c119dad6SStan Tomov       // Vector doesn't own the data and was set on GPU
191c119dad6SStan Tomov       if (impl->darray == NULL) {
192c119dad6SStan Tomov         // call was made just to allocate memory
193c119dad6SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
194c119dad6SStan Tomov         CeedChk(ierr);
195c119dad6SStan Tomov       } else
196c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST");
19782b77998SStan Tomov     }
19882b77998SStan Tomov     *array = impl->array;
199e2900954SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
200c119dad6SStan Tomov     if (impl->darray == NULL) {
201c119dad6SStan Tomov       // Vector doesn't own the data and was set on the CPU
202c119dad6SStan Tomov       if (impl->array == NULL) {
203c119dad6SStan Tomov         // call was made just to allocate memory
204e2900954SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
205e2900954SStan Tomov         CeedChk(ierr);
206c119dad6SStan Tomov       } else
207c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE");
208e2900954SStan Tomov     }
209e2900954SStan Tomov     *array = impl->darray;
210e2900954SStan Tomov   } else
211e2900954SStan Tomov     return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory");
212e2900954SStan Tomov 
21382b77998SStan Tomov   return 0;
21482b77998SStan Tomov }
21582b77998SStan Tomov 
21693fbe329SStan Tomov // *****************************************************************************
217e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST
218e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if
219e2900954SStan Tomov // * it was GPU memory we would not call this routine at all.
22093fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values
22193fbe329SStan Tomov // * from vec and possibly modified them.
22293fbe329SStan Tomov // *****************************************************************************
22382b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) {
22493fbe329SStan Tomov   CeedVector_Magma *impl = vec->data;
22593fbe329SStan Tomov 
226c119dad6SStan Tomov   // Check if the array is a CPU pointer
227c119dad6SStan Tomov   if (*array == impl->array) {
228c119dad6SStan Tomov     // Update device, if the device pointer is not NULL
229c119dad6SStan Tomov     if (impl->darray != NULL) {
23093fbe329SStan Tomov       magma_setvector(vec->length, sizeof(*array[0]),
23193fbe329SStan Tomov                       *array, 1, impl->darray, 1);
2325a9ca9adSVeselin Dobrev     } else {
2335a9ca9adSVeselin Dobrev       // nothing to do (case of CPU use pointer)
2345a9ca9adSVeselin Dobrev     }
235c119dad6SStan Tomov 
236c119dad6SStan Tomov   } else if (impl->down_) {
237c119dad6SStan Tomov     // nothing to do if array is on GPU, except if down_=1(case CPU use pointer)
238c119dad6SStan Tomov     magma_getvector(vec->length, sizeof(*array[0]),
239c119dad6SStan Tomov                     impl->darray, 1, impl->array, 1);
240c119dad6SStan Tomov   }
241e2900954SStan Tomov 
24282b77998SStan Tomov   *array = NULL;
24382b77998SStan Tomov   return 0;
24482b77998SStan Tomov }
24582b77998SStan Tomov 
24693fbe329SStan Tomov // *****************************************************************************
247e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST
248e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if
249e2900954SStan Tomov // * it was GPU memory we would not call this routine at all.
25093fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values
25193fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile
25293fbe329SStan Tomov // * and needs to be restored here.
25393fbe329SStan Tomov // *****************************************************************************
25482b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec,
25582b77998SStan Tomov     const CeedScalar **array) {
25693fbe329SStan Tomov   CeedVector_Magma *impl = vec->data;
25793fbe329SStan Tomov 
258c119dad6SStan Tomov   // Check if the array is a CPU pointer
259c119dad6SStan Tomov   if (*array == impl->array) {
260c119dad6SStan Tomov     // Update device, if the device pointer is not NULL
2615a9ca9adSVeselin Dobrev     if (impl->darray != NULL) {
26293fbe329SStan Tomov       magma_setvector(vec->length, sizeof(*array[0]),
26393fbe329SStan Tomov                       *array, 1, impl->darray, 1);
2645a9ca9adSVeselin Dobrev     } else {
2655a9ca9adSVeselin Dobrev       // nothing to do (case of CPU use pointer)
2665a9ca9adSVeselin Dobrev     }
267c119dad6SStan Tomov 
268c119dad6SStan Tomov   } else if (impl->down_) {
269c119dad6SStan Tomov     // nothing to do if array is on GPU, except if down_=1(case CPU use pointer)
270c119dad6SStan Tomov     magma_getvector(vec->length, sizeof(*array[0]),
271c119dad6SStan Tomov                     impl->darray, 1, impl->array, 1);
272c119dad6SStan Tomov   }
273e2900954SStan Tomov 
27482b77998SStan Tomov   *array = NULL;
27582b77998SStan Tomov   return 0;
27682b77998SStan Tomov }
27782b77998SStan Tomov 
27882b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) {
27982b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
28082b77998SStan Tomov   int ierr;
28182b77998SStan Tomov 
282e2900954SStan Tomov   // Free if we own the data
283e2900954SStan Tomov   if (impl->own_) {
284e2900954SStan Tomov     ierr = magma_free_pinned(impl->array); CeedChk(ierr);
28593fbe329SStan Tomov     ierr = magma_free(impl->darray);       CeedChk(ierr);
286c119dad6SStan Tomov   } else if (impl->down_) {
287c119dad6SStan Tomov     ierr = magma_free(impl->darray);       CeedChk(ierr);
288e2900954SStan Tomov   }
28982b77998SStan Tomov   ierr = CeedFree(&vec->data); CeedChk(ierr);
29082b77998SStan Tomov   return 0;
29182b77998SStan Tomov }
29282b77998SStan Tomov 
29393fbe329SStan Tomov // *****************************************************************************
29493fbe329SStan Tomov // * Create vector vec of size n
29593fbe329SStan Tomov // *****************************************************************************
29682b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) {
29782b77998SStan Tomov   CeedVector_Magma *impl;
29882b77998SStan Tomov   int ierr;
29982b77998SStan Tomov 
30082b77998SStan Tomov   vec->SetArray = CeedVectorSetArray_Magma;
30182b77998SStan Tomov   vec->GetArray = CeedVectorGetArray_Magma;
30282b77998SStan Tomov   vec->GetArrayRead = CeedVectorGetArrayRead_Magma;
30382b77998SStan Tomov   vec->RestoreArray = CeedVectorRestoreArray_Magma;
30482b77998SStan Tomov   vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma;
30582b77998SStan Tomov   vec->Destroy = CeedVectorDestroy_Magma;
30682b77998SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
30793fbe329SStan Tomov   impl->darray = NULL;
308e2900954SStan Tomov   impl->array  = NULL;
309e2900954SStan Tomov   impl->own_ = 0;
310c119dad6SStan Tomov   impl->down_= 0;
31182b77998SStan Tomov   vec->data = impl;
31282b77998SStan Tomov   return 0;
31382b77998SStan Tomov }
31482b77998SStan Tomov 
31593fbe329SStan Tomov // *****************************************************************************
31693fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u
31793fbe329SStan Tomov // *****************************************************************************
31882b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r,
31982b77998SStan Tomov     CeedTransposeMode tmode, CeedInt ncomp,
32082b77998SStan Tomov     CeedTransposeMode lmode, CeedVector u,
32182b77998SStan Tomov     CeedVector v, CeedRequest *request) {
32282b77998SStan Tomov   CeedElemRestriction_Magma *impl = r->data;
32382b77998SStan Tomov   int ierr;
32482b77998SStan Tomov   const CeedScalar *uu;
32582b77998SStan Tomov   CeedScalar *vv;
326c119dad6SStan Tomov   CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof;
327c119dad6SStan Tomov   CeedInt esize = nelem * elemsize;
32806b636dbSStan Tomov 
32906b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
330137a0714SStan Tomov   CeedInt *dindices = impl->dindices;
331c119dad6SStan Tomov   // Get pointers on the device
332c119dad6SStan Tomov   ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr);
333c119dad6SStan Tomov   ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr);
33406b636dbSStan Tomov #else
33506b636dbSStan Tomov   CeedInt *indices = impl->indices;
33606b636dbSStan Tomov   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
33706b636dbSStan Tomov   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
33806b636dbSStan Tomov #endif
339c119dad6SStan Tomov 
34082b77998SStan Tomov   if (tmode == CEED_NOTRANSPOSE) {
34182b77998SStan Tomov     // Perform: v = r * u
34282b77998SStan Tomov     if (ncomp == 1) {
34306b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
344c119dad6SStan Tomov magma_template<<i=0:esize>>
345c119dad6SStan Tomov       (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
346c119dad6SStan Tomov         vv[i] = uu[dindices[i]];
347e2900954SStan Tomov       }
34806b636dbSStan Tomov #else
34906b636dbSStan Tomov       for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]];
35006b636dbSStan Tomov #endif
35182b77998SStan Tomov     } else {
35282b77998SStan Tomov       // vv is (elemsize x ncomp x nelem), column-major
35382b77998SStan Tomov       if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
35406b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
35506b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
35606b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) {
35706b636dbSStan Tomov           vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d];
35806b636dbSStan Tomov         }
35906b636dbSStan Tomov #else
360c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
36182b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
362c119dad6SStan Tomov             for (CeedInt i=0; i < elemsize; i++) {
363c119dad6SStan Tomov               vv[i + elemsize*(d+ncomp*e)] =
364c119dad6SStan Tomov                 uu[indices[i+elemsize*e]+ndof*d];
365c119dad6SStan Tomov             }
36606b636dbSStan Tomov #endif
36782b77998SStan Tomov       } else { // u is (ncomp x ndof), column-major
36806b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
36906b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
37006b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
37106b636dbSStan Tomov           vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]];
37206b636dbSStan Tomov         }
37306b636dbSStan Tomov #else
374c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
37582b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
376c119dad6SStan Tomov             for (CeedInt i=0; i< elemsize; i++) {
377c119dad6SStan Tomov               vv[i + elemsize*(d+ncomp*e)] =
378c119dad6SStan Tomov                 uu[d+ncomp*indices[i+elemsize*e]];
379c119dad6SStan Tomov             }
38006b636dbSStan Tomov #endif
38182b77998SStan Tomov       }
38282b77998SStan Tomov     }
38382b77998SStan Tomov   } else {
38482b77998SStan Tomov     // Note: in transpose mode, we perform: v += r^t * u
38582b77998SStan Tomov     if (ncomp == 1) {
386c119dad6SStan Tomov       // fprintf(stderr,"3 ---------\n");
38706b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
388c119dad6SStan Tomov magma_template<<i=0:esize>>
389c119dad6SStan Tomov       (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
390c119dad6SStan Tomov         magmablas_datomic_add( &vv[dindices[i]], uu[i]);
391c119dad6SStan Tomov       }
39206b636dbSStan Tomov #else
39306b636dbSStan Tomov       for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i];
39406b636dbSStan Tomov #endif
395c119dad6SStan Tomov     } else { // u is (elemsize x ncomp x nelem)
396c119dad6SStan Tomov       fprintf(stderr,"2 ---------\n");
397c119dad6SStan Tomov 
39882b77998SStan Tomov       if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
39906b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
40006b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
40106b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) {
40206b636dbSStan Tomov           magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d],
40306b636dbSStan Tomov                                  uu[i+iend*(d+e*dend)]);
40406b636dbSStan Tomov         }
40506b636dbSStan Tomov #else
406c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
40782b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
408c119dad6SStan Tomov             for (CeedInt i=0; i < elemsize; i++) {
409c119dad6SStan Tomov               vv[indices[i + elemsize*e]+ndof*d] +=
410c119dad6SStan Tomov                 uu[i + elemsize*(d+e*ncomp)];
411c119dad6SStan Tomov             }
41206b636dbSStan Tomov #endif
41306b636dbSStan Tomov       } else { // vv is (ncomp x ndof), column-major
41406b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
415c119dad6SStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
41606b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
41706b636dbSStan Tomov           magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]],
418c119dad6SStan Tomov                                  uu[i+iend*(d+e*dend)]);
41982b77998SStan Tomov         }
42006b636dbSStan Tomov #else
421c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
42282b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
423c119dad6SStan Tomov             for (CeedInt i=0; i < elemsize; i++) {
424c119dad6SStan Tomov               vv[d+ncomp*indices[i + elemsize*e]] +=
425c119dad6SStan Tomov                 uu[i + elemsize*(d+e*ncomp)];
426c119dad6SStan Tomov             }
42706b636dbSStan Tomov #endif
42882b77998SStan Tomov       }
42982b77998SStan Tomov     }
43082b77998SStan Tomov   }
43106b636dbSStan Tomov 
43282b77998SStan Tomov   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
43382b77998SStan Tomov   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
434c119dad6SStan Tomov 
43582b77998SStan Tomov   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
43682b77998SStan Tomov     *request = NULL;
43782b77998SStan Tomov   return 0;
43882b77998SStan Tomov }
43982b77998SStan Tomov 
44082b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) {
44182b77998SStan Tomov   CeedElemRestriction_Magma *impl = r->data;
44282b77998SStan Tomov   int ierr;
44382b77998SStan Tomov 
4441751b0a9SStan Tomov   // Free if we own the data
4451751b0a9SStan Tomov   if (impl->own_) {
4461751b0a9SStan Tomov     ierr = magma_free_pinned(impl->indices); CeedChk(ierr);
4471751b0a9SStan Tomov     ierr = magma_free(impl->dindices);       CeedChk(ierr);
4481981b6e4STzanio   } else if (impl->down_) {
449c119dad6SStan Tomov     ierr = magma_free(impl->dindices);       CeedChk(ierr);
450c119dad6SStan Tomov   }
45182b77998SStan Tomov   ierr = CeedFree(&r->data); CeedChk(ierr);
45282b77998SStan Tomov   return 0;
45382b77998SStan Tomov }
45482b77998SStan Tomov 
45582b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r,
45682b77998SStan Tomov     CeedMemType mtype,
45793fbe329SStan Tomov     CeedCopyMode cmode,
45893fbe329SStan Tomov     const CeedInt *indices) {
4591751b0a9SStan Tomov   int ierr, size = r->nelem*r->elemsize;
46082b77998SStan Tomov   CeedElemRestriction_Magma *impl;
46182b77998SStan Tomov 
4621751b0a9SStan Tomov   // Allocate memory for the MAGMA Restricton and initializa pointers to NULL
46382b77998SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
4641751b0a9SStan Tomov   impl->dindices = NULL;
4651751b0a9SStan Tomov   impl->indices  = NULL;
4661751b0a9SStan Tomov   impl->own_ = 0;
467c119dad6SStan Tomov   impl->down_= 0;
4681751b0a9SStan Tomov 
4691751b0a9SStan Tomov   if (mtype == CEED_MEM_HOST) {
4701751b0a9SStan Tomov     // memory is on the host; own_ = 0
47182b77998SStan Tomov     switch (cmode) {
47282b77998SStan Tomov     case CEED_COPY_VALUES:
4731751b0a9SStan Tomov       ierr = magma_malloc( (void**)&impl->dindices,
4741751b0a9SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
4751751b0a9SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->indices,
4761751b0a9SStan Tomov                                   size * sizeof(CeedInt)); CeedChk(ierr);
4771751b0a9SStan Tomov       impl->own_ = 1;
4781751b0a9SStan Tomov 
47906b636dbSStan Tomov       if (indices != NULL) {
48006b636dbSStan Tomov         memcpy(impl->indices, indices, size * sizeof(indices[0]));
4811751b0a9SStan Tomov         magma_setvector(size, sizeof(CeedInt),
48206b636dbSStan Tomov                         impl->indices, 1, impl->dindices, 1);
48306b636dbSStan Tomov       }
48482b77998SStan Tomov       break;
48582b77998SStan Tomov     case CEED_OWN_POINTER:
4861751b0a9SStan Tomov       ierr = magma_malloc( (void**)&impl->dindices,
4871751b0a9SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
4881751b0a9SStan Tomov       // TODO: possible problem here is if we are passed non-pinned memory;
4891751b0a9SStan Tomov       //       (as we own it, lter in destroy, we use free for pinned memory).
490137a0714SStan Tomov       impl->indices = (CeedInt *)indices;
4911751b0a9SStan Tomov       impl->own_ = 1;
4921751b0a9SStan Tomov 
493c119dad6SStan Tomov       if (indices != NULL)
4941751b0a9SStan Tomov         magma_setvector(size, sizeof(CeedInt),
4951751b0a9SStan Tomov                         indices, 1, impl->dindices, 1);
49682b77998SStan Tomov       break;
49782b77998SStan Tomov     case CEED_USE_POINTER:
498c119dad6SStan Tomov       ierr = magma_malloc( (void**)&impl->dindices,
499c119dad6SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
500c119dad6SStan Tomov       magma_setvector(size, sizeof(CeedInt),
501c119dad6SStan Tomov                       indices, 1, impl->dindices, 1);
502c119dad6SStan Tomov       impl->down_ = 1;
503137a0714SStan Tomov       impl->indices  = (CeedInt *)indices;
50482b77998SStan Tomov     }
5051751b0a9SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
5061751b0a9SStan Tomov     // memory is on the device; own = 0
5071751b0a9SStan Tomov     switch (cmode) {
5081751b0a9SStan Tomov     case CEED_COPY_VALUES:
5091751b0a9SStan Tomov       ierr = magma_malloc( (void**)&impl->dindices,
5101751b0a9SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
5111751b0a9SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->indices,
5121751b0a9SStan Tomov                                   size * sizeof(CeedInt)); CeedChk(ierr);
5131751b0a9SStan Tomov       impl->own_ = 1;
5141751b0a9SStan Tomov 
5151751b0a9SStan Tomov       if (indices)
5161751b0a9SStan Tomov         magma_copyvector(size, sizeof(CeedInt),
5171751b0a9SStan Tomov                          indices, 1, impl->dindices, 1);
5181751b0a9SStan Tomov       break;
5191751b0a9SStan Tomov     case CEED_OWN_POINTER:
520137a0714SStan Tomov       impl->dindices = (CeedInt *)indices;
5211751b0a9SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->indices,
5221751b0a9SStan Tomov                                   size * sizeof(CeedInt)); CeedChk(ierr);
5231751b0a9SStan Tomov       impl->own_ = 1;
5241751b0a9SStan Tomov 
5251751b0a9SStan Tomov       break;
5261751b0a9SStan Tomov     case CEED_USE_POINTER:
527137a0714SStan Tomov       impl->dindices = (CeedInt *)indices;
5281751b0a9SStan Tomov       impl->indices  = NULL;
5291751b0a9SStan Tomov     }
5301751b0a9SStan Tomov 
5311751b0a9SStan Tomov   } else
5321751b0a9SStan Tomov     return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported");
5331751b0a9SStan Tomov 
53482b77998SStan Tomov   r->data    = impl;
53582b77998SStan Tomov   r->Apply   = CeedElemRestrictionApply_Magma;
53682b77998SStan Tomov   r->Destroy = CeedElemRestrictionDestroy_Magma;
5371751b0a9SStan Tomov 
53882b77998SStan Tomov   return 0;
53982b77998SStan Tomov }
54082b77998SStan Tomov 
54182b77998SStan Tomov // Contracts on the middle index
54282b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc
54382b77998SStan Tomov // TRANSPOSE:   V_ajc = T_bj U_abc
54482b77998SStan Tomov // If Add != 0, "=" is replaced by "+="
54582b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed,
54682b77998SStan Tomov                                     CeedInt A, CeedInt B, CeedInt C, CeedInt J,
54782b77998SStan Tomov                                     const CeedScalar *t, CeedTransposeMode tmode,
54882b77998SStan Tomov                                     const CeedInt Add,
54982b77998SStan Tomov                                     const CeedScalar *u, CeedScalar *v) {
55038612b08SStan Tomov #ifdef USE_MAGMA_BATCH
55138612b08SStan Tomov   magma_dtensor_contract(ceed, A, B, C, J, t, tmode, Add, u, v);
55238612b08SStan Tomov #else
55382b77998SStan Tomov   CeedInt tstride0 = B, tstride1 = 1;
55482b77998SStan Tomov   if (tmode == CEED_TRANSPOSE) {
55582b77998SStan Tomov     tstride0 = 1; tstride1 = J;
55682b77998SStan Tomov   }
55738612b08SStan Tomov   CeedDebug("\033[31m[CeedTensorContract] A=%d, J=%d, C=%d, B=%d: %d %d %d",
55838612b08SStan Tomov             A,J,C,B,A*J*B*C, C*J*A, C*B*A);
55982b77998SStan Tomov   for (CeedInt a=0; a<A; a++) {
56082b77998SStan Tomov     for (CeedInt j=0; j<J; j++) {
56182b77998SStan Tomov       if (!Add) {
56282b77998SStan Tomov         for (CeedInt c=0; c<C; c++)
56382b77998SStan Tomov           v[(a*J+j)*C+c] = 0;
56482b77998SStan Tomov       }
56582b77998SStan Tomov       for (CeedInt b=0; b<B; b++) {
56682b77998SStan Tomov         for (CeedInt c=0; c<C; c++) {
56782b77998SStan Tomov           v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c];
56882b77998SStan Tomov         }
56982b77998SStan Tomov       }
57082b77998SStan Tomov     }
57182b77998SStan Tomov   }
57238612b08SStan Tomov #endif
57382b77998SStan Tomov   return 0;
57482b77998SStan Tomov }
57582b77998SStan Tomov 
57682b77998SStan Tomov static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode,
57782b77998SStan Tomov                                 CeedEvalMode emode,
57882b77998SStan Tomov                                 const CeedScalar *u, CeedScalar *v) {
57982b77998SStan Tomov   int ierr;
58082b77998SStan Tomov   const CeedInt dim = basis->dim;
58182b77998SStan Tomov   const CeedInt ndof = basis->ndof;
58282b77998SStan Tomov   const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim);
58382b77998SStan Tomov   const CeedInt add = (tmode == CEED_TRANSPOSE);
58482b77998SStan Tomov 
58538612b08SStan Tomov   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d",
58638612b08SStan Tomov             ndof*CeedPowInt(basis->P1d, dim));
58738612b08SStan Tomov 
58882b77998SStan Tomov   if (tmode == CEED_TRANSPOSE) {
58982b77998SStan Tomov     const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim);
59082b77998SStan Tomov     for (CeedInt i = 0; i < vsize; i++)
59182b77998SStan Tomov       v[i] = (CeedScalar) 0;
59282b77998SStan Tomov   }
59382b77998SStan Tomov   if (emode & CEED_EVAL_INTERP) {
59482b77998SStan Tomov     CeedInt P = basis->P1d, Q = basis->Q1d;
59582b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
59682b77998SStan Tomov       P = basis->Q1d; Q = basis->P1d;
59782b77998SStan Tomov     }
59882b77998SStan Tomov     CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
59982b77998SStan Tomov     CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
60038612b08SStan Tomov     CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d",
60138612b08SStan Tomov               ndof*Q*CeedPowInt(P>Q?P:Q, dim-1));
60282b77998SStan Tomov     for (CeedInt d=0; d<dim; d++) {
60382b77998SStan Tomov       ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d,
60482b77998SStan Tomov                                       tmode, add&&(d==dim-1),
60582b77998SStan Tomov                                       d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
60682b77998SStan Tomov       CeedChk(ierr);
60782b77998SStan Tomov       pre /= P;
60882b77998SStan Tomov       post *= Q;
60982b77998SStan Tomov     }
61082b77998SStan Tomov     if (tmode == CEED_NOTRANSPOSE) {
61182b77998SStan Tomov       v += nqpt;
61282b77998SStan Tomov     } else {
61382b77998SStan Tomov       u += nqpt;
61482b77998SStan Tomov     }
61582b77998SStan Tomov   }
61682b77998SStan Tomov   if (emode & CEED_EVAL_GRAD) {
61782b77998SStan Tomov     CeedInt P = basis->P1d, Q = basis->Q1d;
61882b77998SStan Tomov     // In CEED_NOTRANSPOSE mode:
61982b77998SStan Tomov     // u is (P^dim x nc), column-major layout (nc = ndof)
62082b77998SStan Tomov     // v is (Q^dim x nc x dim), column-major layout (nc = ndof)
62182b77998SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
62282b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
62382b77998SStan Tomov       P = basis->Q1d, Q = basis->P1d;
62482b77998SStan Tomov     }
62582b77998SStan Tomov     CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
62638612b08SStan Tomov     CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d",
62738612b08SStan Tomov               ndof*Q*CeedPowInt(P>Q?P:Q, dim-1));
62882b77998SStan Tomov     for (CeedInt p = 0; p < dim; p++) {
62982b77998SStan Tomov       CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
63082b77998SStan Tomov       for (CeedInt d=0; d<dim; d++) {
63182b77998SStan Tomov         ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q,
63282b77998SStan Tomov                                         (p==d)?basis->grad1d:basis->interp1d,
63382b77998SStan Tomov                                         tmode, add&&(d==dim-1),
63482b77998SStan Tomov                                         d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
63582b77998SStan Tomov         CeedChk(ierr);
63682b77998SStan Tomov         pre /= P;
63782b77998SStan Tomov         post *= Q;
63882b77998SStan Tomov       }
63982b77998SStan Tomov       if (tmode == CEED_NOTRANSPOSE) {
64082b77998SStan Tomov         v += nqpt;
64182b77998SStan Tomov       } else {
64282b77998SStan Tomov         u += nqpt;
64382b77998SStan Tomov       }
64482b77998SStan Tomov     }
64582b77998SStan Tomov   }
64682b77998SStan Tomov   if (emode & CEED_EVAL_WEIGHT) {
64782b77998SStan Tomov     if (tmode == CEED_TRANSPOSE)
64882b77998SStan Tomov       return CeedError(basis->ceed, 1,
64982b77998SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
65082b77998SStan Tomov     CeedInt Q = basis->Q1d;
65182b77998SStan Tomov     for (CeedInt d=0; d<dim; d++) {
65282b77998SStan Tomov       CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d);
65382b77998SStan Tomov       for (CeedInt i=0; i<pre; i++) {
65482b77998SStan Tomov         for (CeedInt j=0; j<Q; j++) {
65582b77998SStan Tomov           for (CeedInt k=0; k<post; k++) {
65682b77998SStan Tomov             v[(i*Q + j)*post + k] = basis->qweight1d[j]
65782b77998SStan Tomov                                     * (d == 0 ? 1 : v[(i*Q + j)*post + k]);
65882b77998SStan Tomov           }
65982b77998SStan Tomov         }
66082b77998SStan Tomov       }
66182b77998SStan Tomov     }
66282b77998SStan Tomov   }
66382b77998SStan Tomov   return 0;
66482b77998SStan Tomov }
66582b77998SStan Tomov 
66682b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) {
66782b77998SStan Tomov   return 0;
66882b77998SStan Tomov }
66982b77998SStan Tomov 
67082b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d,
67182b77998SStan Tomov     CeedInt Q1d, const CeedScalar *interp1d,
67282b77998SStan Tomov     const CeedScalar *grad1d,
67382b77998SStan Tomov     const CeedScalar *qref1d,
67482b77998SStan Tomov     const CeedScalar *qweight1d,
67582b77998SStan Tomov     CeedBasis basis) {
67682b77998SStan Tomov   basis->Apply = CeedBasisApply_Magma;
67782b77998SStan Tomov   basis->Destroy = CeedBasisDestroy_Magma;
67882b77998SStan Tomov   return 0;
67982b77998SStan Tomov }
68082b77998SStan Tomov 
68182b77998SStan Tomov static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q,
68282b77998SStan Tomov                                     const CeedScalar *const *u,
68382b77998SStan Tomov                                     CeedScalar *const *v) {
68482b77998SStan Tomov   int ierr;
68582b77998SStan Tomov   ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr);
68682b77998SStan Tomov   return 0;
68782b77998SStan Tomov }
68882b77998SStan Tomov 
68982b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) {
69082b77998SStan Tomov   return 0;
69182b77998SStan Tomov }
69282b77998SStan Tomov 
69382b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) {
69482b77998SStan Tomov   qf->Apply = CeedQFunctionApply_Magma;
69582b77998SStan Tomov   qf->Destroy = CeedQFunctionDestroy_Magma;
69682b77998SStan Tomov   return 0;
69782b77998SStan Tomov }
69882b77998SStan Tomov 
69982b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) {
70082b77998SStan Tomov   CeedOperator_Magma *impl = op->data;
70182b77998SStan Tomov   int ierr;
70282b77998SStan Tomov 
70382b77998SStan Tomov   ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr);
70482b77998SStan Tomov   ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr);
70582b77998SStan Tomov   ierr = CeedFree(&op->data); CeedChk(ierr);
70682b77998SStan Tomov   return 0;
70782b77998SStan Tomov }
70882b77998SStan Tomov 
70982b77998SStan Tomov static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata,
71082b77998SStan Tomov                                    CeedVector ustate,
71182b77998SStan Tomov                                    CeedVector residual, CeedRequest *request) {
71282b77998SStan Tomov   CeedOperator_Magma *impl = op->data;
71382b77998SStan Tomov   CeedVector etmp;
71482b77998SStan Tomov   CeedInt Q;
71582b77998SStan Tomov   const CeedInt nc = op->basis->ndof, dim = op->basis->dim;
71682b77998SStan Tomov   CeedScalar *Eu;
71782b77998SStan Tomov   char *qd;
71882b77998SStan Tomov   int ierr;
71982b77998SStan Tomov   CeedTransposeMode lmode = CEED_NOTRANSPOSE;
72082b77998SStan Tomov 
72182b77998SStan Tomov   if (!impl->etmp) {
72282b77998SStan Tomov     ierr = CeedVectorCreate(op->ceed,
72382b77998SStan Tomov                             nc * op->Erestrict->nelem * op->Erestrict->elemsize,
72482b77998SStan Tomov                             &impl->etmp); CeedChk(ierr);
72582b77998SStan Tomov     // etmp is allocated when CeedVectorGetArray is called below
72682b77998SStan Tomov   }
72782b77998SStan Tomov   etmp = impl->etmp;
72882b77998SStan Tomov   if (op->qf->inmode & ~CEED_EVAL_WEIGHT) {
72982b77998SStan Tomov     ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE,
73082b77998SStan Tomov                                     nc, lmode, ustate, etmp,
73182b77998SStan Tomov                                     CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
73282b77998SStan Tomov   }
73382b77998SStan Tomov   ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
73482b77998SStan Tomov   ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr);
73582b77998SStan Tomov   ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd);
73682b77998SStan Tomov   CeedChk(ierr);
73738612b08SStan Tomov 
73882b77998SStan Tomov   for (CeedInt e=0; e<op->Erestrict->nelem; e++) {
73982b77998SStan Tomov     CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0};
74082b77998SStan Tomov     const CeedScalar *in[5] = {0,0,0,0,0};
74182b77998SStan Tomov     // TODO: quadrature weights can be computed just once
74238612b08SStan Tomov     CeedDebug("\033[11m[CeedOperatorApply_Magma] e=%d: Eu+%d, %d",
74338612b08SStan Tomov               e, e*op->Erestrict->elemsize*nc, Q*nc*(dim+2));
74482b77998SStan Tomov     ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode,
74582b77998SStan Tomov                           &Eu[e*op->Erestrict->elemsize*nc], BEu);
74682b77998SStan Tomov     CeedChk(ierr);
74782b77998SStan Tomov     CeedScalar *u_ptr = BEu, *v_ptr = BEv;
74882b77998SStan Tomov     if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; }
74982b77998SStan Tomov     if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; }
75082b77998SStan Tomov     if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; }
75182b77998SStan Tomov     if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; }
75282b77998SStan Tomov     if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; }
75382b77998SStan Tomov     ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out);
75482b77998SStan Tomov     CeedChk(ierr);
75538612b08SStan Tomov     CeedDebug("\033[31m[CeedOperatorApply_Magma] e=%d: ",e);
75682b77998SStan Tomov     ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv,
75782b77998SStan Tomov                           &Eu[e*op->Erestrict->elemsize*nc]);
75882b77998SStan Tomov     CeedChk(ierr);
75982b77998SStan Tomov   }
76082b77998SStan Tomov   ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr);
76106b636dbSStan Tomov   // qdata must be restored
76206b636dbSStan Tomov   ierr = CeedVectorRestoreArray(qdata, (CeedScalar**)&qd); CeedChk(ierr);
76382b77998SStan Tomov   if (residual) {
76482b77998SStan Tomov     CeedScalar *res;
76582b77998SStan Tomov     CeedVectorGetArray(residual, CEED_MEM_HOST, &res);
76682b77998SStan Tomov     for (int i = 0; i < residual->length; i++)
76782b77998SStan Tomov       res[i] = (CeedScalar)0;
76806b636dbSStan Tomov     // residual must be restored
76906b636dbSStan Tomov     CeedVectorRestoreArray(residual, &res);
77082b77998SStan Tomov     ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE,
77182b77998SStan Tomov                                     nc, lmode, etmp, residual,
77282b77998SStan Tomov                                     CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
77382b77998SStan Tomov   }
77482b77998SStan Tomov   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
77582b77998SStan Tomov     *request = NULL;
77682b77998SStan Tomov   return 0;
77782b77998SStan Tomov }
77882b77998SStan Tomov 
77982b77998SStan Tomov static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) {
78082b77998SStan Tomov   CeedOperator_Magma *impl = op->data;
78182b77998SStan Tomov   int ierr;
78282b77998SStan Tomov 
78382b77998SStan Tomov   if (!impl->qdata) {
78482b77998SStan Tomov     CeedInt Q;
78582b77998SStan Tomov     ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
78682b77998SStan Tomov     ierr = CeedVectorCreate(op->ceed,
78782b77998SStan Tomov                             op->Erestrict->nelem * Q
78882b77998SStan Tomov                             * op->qf->qdatasize / sizeof(CeedScalar),
78982b77998SStan Tomov                             &impl->qdata); CeedChk(ierr);
79082b77998SStan Tomov   }
79182b77998SStan Tomov   *qdata = impl->qdata;
79282b77998SStan Tomov   return 0;
79382b77998SStan Tomov }
79482b77998SStan Tomov 
79582b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) {
79682b77998SStan Tomov   CeedOperator_Magma *impl;
79782b77998SStan Tomov   int ierr;
79882b77998SStan Tomov 
79982b77998SStan Tomov   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
80082b77998SStan Tomov   op->data = impl;
80182b77998SStan Tomov   op->Destroy  = CeedOperatorDestroy_Magma;
80282b77998SStan Tomov   op->Apply    = CeedOperatorApply_Magma;
80382b77998SStan Tomov   op->GetQData = CeedOperatorGetQData_Magma;
80482b77998SStan Tomov   return 0;
80582b77998SStan Tomov }
80682b77998SStan Tomov 
80782b77998SStan Tomov // *****************************************************************************
80882b77998SStan Tomov // * INIT
80982b77998SStan Tomov // *****************************************************************************
81082b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) {
8111dc2661bSVeselin Dobrev   int ierr;
8122a847359SStan Tomov   if (strcmp(resource, "/gpu/magma"))
81382b77998SStan Tomov     return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource);
81493fbe329SStan Tomov 
8151dc2661bSVeselin Dobrev   ierr = magma_init();
8161dc2661bSVeselin Dobrev   if (ierr) return CeedError(ceed, 1, "error in magma_init(): %d\n", ierr);
81793fbe329SStan Tomov   //magma_print_environment();
81893fbe329SStan Tomov 
81982b77998SStan Tomov   ceed->VecCreate = CeedVectorCreate_Magma;
82082b77998SStan Tomov   ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma;
82182b77998SStan Tomov   ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma;
82282b77998SStan Tomov   ceed->QFunctionCreate = CeedQFunctionCreate_Magma;
82382b77998SStan Tomov   ceed->OperatorCreate = CeedOperatorCreate_Magma;
82482b77998SStan Tomov   return 0;
82582b77998SStan Tomov }
82682b77998SStan Tomov 
82782b77998SStan Tomov // *****************************************************************************
82882b77998SStan Tomov // * REGISTER
82982b77998SStan Tomov // *****************************************************************************
83082b77998SStan Tomov __attribute__((constructor))
83182b77998SStan Tomov static void Register(void) {
832*44951fc6Sjeremylt   CeedRegister("/gpu/magma", CeedInit_Magma, 20);
83382b77998SStan Tomov }
834