xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.c (revision c44a85f41cd2820e6e38bc7468b238fa89afb40f)
182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details.
482b77998SStan Tomov //
582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
782b77998SStan Tomov // element discretizations for exascale applications. For more information and
882b77998SStan Tomov // source code availability see http://github.com/ceed.
982b77998SStan Tomov //
1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early
1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
1682b77998SStan Tomov 
1738612b08SStan Tomov #include "ceed-magma.h"
185a9ca9adSVeselin Dobrev #include <string.h>
1982b77998SStan Tomov 
2082b77998SStan Tomov typedef struct {
2182b77998SStan Tomov   CeedScalar *array;
2293fbe329SStan Tomov   CeedScalar *darray;
23e2900954SStan Tomov   int  own_;
24c119dad6SStan Tomov   int down_;
2582b77998SStan Tomov } CeedVector_Magma;
2682b77998SStan Tomov 
2782b77998SStan Tomov typedef struct {
281751b0a9SStan Tomov   CeedInt *indices;
291751b0a9SStan Tomov   CeedInt *dindices;
301751b0a9SStan Tomov   int  own_;
31c119dad6SStan Tomov   int down_;            // cover a case where we own Device memory
3282b77998SStan Tomov } CeedElemRestriction_Magma;
3382b77998SStan Tomov 
3482b77998SStan Tomov typedef struct {
353bfcb0deSjeremylt   CeedVector
363bfcb0deSjeremylt   *evecs;   /// E-vectors needed to apply operator (input followed by outputs)
373bfcb0deSjeremylt   CeedScalar **edata;
383bfcb0deSjeremylt   CeedScalar **qdata; /// Inputs followed by outputs
393bfcb0deSjeremylt   CeedScalar
403bfcb0deSjeremylt   **qdata_alloc; /// Allocated quadrature data arrays (to be freed by us)
413bfcb0deSjeremylt   CeedScalar **indata;
423bfcb0deSjeremylt   CeedScalar **outdata;
433bfcb0deSjeremylt   CeedInt    numein;
443bfcb0deSjeremylt   CeedInt    numeout;
453bfcb0deSjeremylt   CeedInt    numqin;
463bfcb0deSjeremylt   CeedInt    numqout;
4782b77998SStan Tomov } CeedOperator_Magma;
4882b77998SStan Tomov 
4993fbe329SStan Tomov // *****************************************************************************
5093fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode
5193fbe329SStan Tomov // *   CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal
5293fbe329SStan Tomov // *                     to array, and data is copied (not store passed pointer)
5393fbe329SStan Tomov // *   CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array
5493fbe329SStan Tomov // *   CEED_USE_POINTER: vec->data->array = array (can modify; no ownership)
5593fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE
5693fbe329SStan Tomov // *****************************************************************************
5782b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype,
5882b77998SStan Tomov                                     CeedCopyMode cmode, CeedScalar *array) {
5982b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
6082b77998SStan Tomov   int ierr;
6182b77998SStan Tomov 
62c119dad6SStan Tomov   // If own data, free the "old" data, e.g., as it may be of different size
63e2900954SStan Tomov   if (impl->own_) {
64e2900954SStan Tomov     magma_free( impl->darray );
65e2900954SStan Tomov     magma_free_pinned( impl->array );
66e2900954SStan Tomov     impl->darray = NULL;
67e2900954SStan Tomov     impl->array  = NULL;
68e2900954SStan Tomov     impl->own_ = 0;
69c119dad6SStan Tomov     impl->down_= 0;
70e2900954SStan Tomov   }
7193fbe329SStan Tomov 
72e2900954SStan Tomov   if (mtype == CEED_MEM_HOST) {
73e2900954SStan Tomov     // memory is on the host; own_ = 0
7482b77998SStan Tomov     switch (cmode) {
7582b77998SStan Tomov     case CEED_COPY_VALUES:
7693fbe329SStan Tomov       ierr = magma_malloc( (void**)&impl->darray,
77e2900954SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
78e2900954SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->array,
79e2900954SStan Tomov                                   vec->length * sizeof(CeedScalar)); CeedChk(ierr);
80e2900954SStan Tomov       impl->own_ = 1;
81e2900954SStan Tomov 
82c119dad6SStan Tomov       if (array != NULL)
8393fbe329SStan Tomov         magma_setvector(vec->length, sizeof(array[0]),
8493fbe329SStan Tomov                         array, 1, impl->darray, 1);
8582b77998SStan Tomov       break;
8682b77998SStan Tomov     case CEED_OWN_POINTER:
8793fbe329SStan Tomov       ierr = magma_malloc( (void**)&impl->darray,
88e2900954SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
891751b0a9SStan Tomov       // TODO: possible problem here is if we are passed non-pinned memory;
901751b0a9SStan Tomov       //       (as we own it, lter in destroy, we use free for pinned memory).
91e2900954SStan Tomov       impl->array = array;
92e2900954SStan Tomov       impl->own_ = 1;
93e2900954SStan Tomov 
94c119dad6SStan Tomov       if (array != NULL)
9593fbe329SStan Tomov         magma_setvector(vec->length, sizeof(array[0]),
9693fbe329SStan Tomov                         array, 1, impl->darray, 1);
9782b77998SStan Tomov       break;
9882b77998SStan Tomov     case CEED_USE_POINTER:
99c119dad6SStan Tomov       ierr = magma_malloc( (void**)&impl->darray,
100c119dad6SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
101c119dad6SStan Tomov       magma_setvector(vec->length, sizeof(array[0]),
102c119dad6SStan Tomov                       array, 1, impl->darray, 1);
10306b636dbSStan Tomov 
10406b636dbSStan Tomov       impl->down_  = 1;
10582b77998SStan Tomov       impl->array  = array;
10682b77998SStan Tomov     }
107e2900954SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
108e2900954SStan Tomov     // memory is on the device; own = 0
109e2900954SStan Tomov     switch (cmode) {
110e2900954SStan Tomov     case CEED_COPY_VALUES:
111e2900954SStan Tomov       ierr = magma_malloc( (void**)&impl->darray,
112e2900954SStan Tomov                            vec->length * sizeof(CeedScalar)); CeedChk(ierr);
113e2900954SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->array,
114e2900954SStan Tomov                                   vec->length * sizeof(CeedScalar)); CeedChk(ierr);
115e2900954SStan Tomov       impl->own_ = 1;
116e2900954SStan Tomov 
117e2900954SStan Tomov       if (array)
118e2900954SStan Tomov         magma_copyvector(vec->length, sizeof(array[0]),
119e2900954SStan Tomov                          array, 1, impl->darray, 1);
12006b636dbSStan Tomov       else
12106b636dbSStan Tomov         // t30 assumes allocation initializes with 0s
12206b636dbSStan Tomov         magma_setvector(vec->length, sizeof(array[0]),
12306b636dbSStan Tomov                         impl->array, 1, impl->darray, 1);
124e2900954SStan Tomov       break;
125e2900954SStan Tomov     case CEED_OWN_POINTER:
126e2900954SStan Tomov       impl->darray = array;
127e2900954SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->array,
128e2900954SStan Tomov                                   vec->length * sizeof(CeedScalar)); CeedChk(ierr);
129e2900954SStan Tomov       impl->own_ = 1;
130e2900954SStan Tomov 
131e2900954SStan Tomov       break;
132e2900954SStan Tomov     case CEED_USE_POINTER:
133e2900954SStan Tomov       impl->darray = array;
134e2900954SStan Tomov       impl->array  = NULL;
135e2900954SStan Tomov     }
136e2900954SStan Tomov 
137e2900954SStan Tomov   } else
138e2900954SStan Tomov     return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported");
139e2900954SStan Tomov 
14082b77998SStan Tomov   return 0;
14182b77998SStan Tomov }
14282b77998SStan Tomov 
14393fbe329SStan Tomov // *****************************************************************************
144e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE)
14593fbe329SStan Tomov // *****************************************************************************
14682b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype,
14782b77998SStan Tomov                                     CeedScalar **array) {
14882b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
14982b77998SStan Tomov   int ierr;
15082b77998SStan Tomov 
151e2900954SStan Tomov   if (mtype == CEED_MEM_HOST) {
152c119dad6SStan Tomov     if (impl->own_) {
153e2900954SStan Tomov       // data is owned so GPU had the most up-to-date version; copy it
15406b636dbSStan Tomov       // TTT - apparantly it doesn't have most up to date data
155e2900954SStan Tomov       magma_getvector(vec->length, sizeof(*array[0]),
156e2900954SStan Tomov                       impl->darray, 1, impl->array, 1);
15706b636dbSStan Tomov       CeedDebug("\033[31m[CeedVectorGetArray_Magma]");
15806b636dbSStan Tomov       //fprintf(stderr,"rrrrrrrrrrrrrrr\n");
159c119dad6SStan Tomov     } else if (impl->array == NULL) {
160c119dad6SStan Tomov       // Vector doesn't own the data and was set on GPU
161c119dad6SStan Tomov       if (impl->darray == NULL) {
162c119dad6SStan Tomov         // call was made just to allocate memory
163c119dad6SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
164c119dad6SStan Tomov         CeedChk(ierr);
165c119dad6SStan Tomov       } else
166c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST");
16782b77998SStan Tomov     }
16882b77998SStan Tomov     *array = impl->array;
169e2900954SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
170c119dad6SStan Tomov     if (impl->darray == NULL) {
171c119dad6SStan Tomov       // Vector doesn't own the data and was set on the CPU
172c119dad6SStan Tomov       if (impl->array == NULL) {
173c119dad6SStan Tomov         // call was made just to allocate memory
174e2900954SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
175e2900954SStan Tomov         CeedChk(ierr);
176c119dad6SStan Tomov       } else
177c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE");
178e2900954SStan Tomov     }
179e2900954SStan Tomov     *array = impl->darray;
180e2900954SStan Tomov   } else
181e2900954SStan Tomov     return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory");
182e2900954SStan Tomov 
18382b77998SStan Tomov   return 0;
18482b77998SStan Tomov }
18582b77998SStan Tomov 
18693fbe329SStan Tomov // *****************************************************************************
187e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it
18893fbe329SStan Tomov // *****************************************************************************
18982b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype,
19082b77998SStan Tomov                                         const CeedScalar **array) {
19182b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
19282b77998SStan Tomov   int ierr;
19382b77998SStan Tomov 
194e2900954SStan Tomov   if (mtype == CEED_MEM_HOST) {
195c119dad6SStan Tomov     if (impl->own_) {
196e2900954SStan Tomov       // data is owned so GPU had the most up-to-date version; copy it
197e2900954SStan Tomov       magma_getvector(vec->length, sizeof(*array[0]),
198e2900954SStan Tomov                       impl->darray, 1, impl->array, 1);
199c119dad6SStan Tomov     } else if (impl->array == NULL) {
200c119dad6SStan Tomov       // Vector doesn't own the data and was set on GPU
201c119dad6SStan Tomov       if (impl->darray == NULL) {
202c119dad6SStan Tomov         // call was made just to allocate memory
203c119dad6SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
204c119dad6SStan Tomov         CeedChk(ierr);
205c119dad6SStan Tomov       } else
206c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST");
20782b77998SStan Tomov     }
20882b77998SStan Tomov     *array = impl->array;
209e2900954SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
210c119dad6SStan Tomov     if (impl->darray == NULL) {
211c119dad6SStan Tomov       // Vector doesn't own the data and was set on the CPU
212c119dad6SStan Tomov       if (impl->array == NULL) {
213c119dad6SStan Tomov         // call was made just to allocate memory
214e2900954SStan Tomov         ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
215e2900954SStan Tomov         CeedChk(ierr);
216c119dad6SStan Tomov       } else
217c119dad6SStan Tomov         return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE");
218e2900954SStan Tomov     }
219e2900954SStan Tomov     *array = impl->darray;
220e2900954SStan Tomov   } else
221e2900954SStan Tomov     return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory");
222e2900954SStan Tomov 
22382b77998SStan Tomov   return 0;
22482b77998SStan Tomov }
22582b77998SStan Tomov 
22693fbe329SStan Tomov // *****************************************************************************
227e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST
228e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if
229e2900954SStan Tomov // * it was GPU memory we would not call this routine at all.
23093fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values
23193fbe329SStan Tomov // * from vec and possibly modified them.
23293fbe329SStan Tomov // *****************************************************************************
23382b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) {
23493fbe329SStan Tomov   CeedVector_Magma *impl = vec->data;
23593fbe329SStan Tomov 
236c119dad6SStan Tomov   // Check if the array is a CPU pointer
237c119dad6SStan Tomov   if (*array == impl->array) {
238c119dad6SStan Tomov     // Update device, if the device pointer is not NULL
239c119dad6SStan Tomov     if (impl->darray != NULL) {
24093fbe329SStan Tomov       magma_setvector(vec->length, sizeof(*array[0]),
24193fbe329SStan Tomov                       *array, 1, impl->darray, 1);
2425a9ca9adSVeselin Dobrev     } else {
2435a9ca9adSVeselin Dobrev       // nothing to do (case of CPU use pointer)
2445a9ca9adSVeselin Dobrev     }
245c119dad6SStan Tomov 
246c119dad6SStan Tomov   } else if (impl->down_) {
247c119dad6SStan Tomov     // nothing to do if array is on GPU, except if down_=1(case CPU use pointer)
248c119dad6SStan Tomov     magma_getvector(vec->length, sizeof(*array[0]),
249c119dad6SStan Tomov                     impl->darray, 1, impl->array, 1);
250c119dad6SStan Tomov   }
251e2900954SStan Tomov 
25282b77998SStan Tomov   *array = NULL;
25382b77998SStan Tomov   return 0;
25482b77998SStan Tomov }
25582b77998SStan Tomov 
25693fbe329SStan Tomov // *****************************************************************************
257e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST
258e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if
259e2900954SStan Tomov // * it was GPU memory we would not call this routine at all.
26093fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values
26193fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile
26293fbe329SStan Tomov // * and needs to be restored here.
26393fbe329SStan Tomov // *****************************************************************************
26482b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec,
26582b77998SStan Tomov     const CeedScalar **array) {
26693fbe329SStan Tomov   CeedVector_Magma *impl = vec->data;
26793fbe329SStan Tomov 
268c119dad6SStan Tomov   // Check if the array is a CPU pointer
269c119dad6SStan Tomov   if (*array == impl->array) {
270c119dad6SStan Tomov     // Update device, if the device pointer is not NULL
2715a9ca9adSVeselin Dobrev     if (impl->darray != NULL) {
27293fbe329SStan Tomov       magma_setvector(vec->length, sizeof(*array[0]),
27393fbe329SStan Tomov                       *array, 1, impl->darray, 1);
2745a9ca9adSVeselin Dobrev     } else {
2755a9ca9adSVeselin Dobrev       // nothing to do (case of CPU use pointer)
2765a9ca9adSVeselin Dobrev     }
277c119dad6SStan Tomov 
278c119dad6SStan Tomov   } else if (impl->down_) {
279c119dad6SStan Tomov     // nothing to do if array is on GPU, except if down_=1(case CPU use pointer)
280c119dad6SStan Tomov     magma_getvector(vec->length, sizeof(*array[0]),
281c119dad6SStan Tomov                     impl->darray, 1, impl->array, 1);
282c119dad6SStan Tomov   }
283e2900954SStan Tomov 
28482b77998SStan Tomov   *array = NULL;
28582b77998SStan Tomov   return 0;
28682b77998SStan Tomov }
28782b77998SStan Tomov 
28882b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) {
28982b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
29082b77998SStan Tomov   int ierr;
29182b77998SStan Tomov 
292e2900954SStan Tomov   // Free if we own the data
293e2900954SStan Tomov   if (impl->own_) {
294e2900954SStan Tomov     ierr = magma_free_pinned(impl->array); CeedChk(ierr);
29593fbe329SStan Tomov     ierr = magma_free(impl->darray);       CeedChk(ierr);
296c119dad6SStan Tomov   } else if (impl->down_) {
297c119dad6SStan Tomov     ierr = magma_free(impl->darray);       CeedChk(ierr);
298e2900954SStan Tomov   }
29982b77998SStan Tomov   ierr = CeedFree(&vec->data); CeedChk(ierr);
30082b77998SStan Tomov   return 0;
30182b77998SStan Tomov }
30282b77998SStan Tomov 
30393fbe329SStan Tomov // *****************************************************************************
30493fbe329SStan Tomov // * Create vector vec of size n
30593fbe329SStan Tomov // *****************************************************************************
30682b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) {
30782b77998SStan Tomov   CeedVector_Magma *impl;
30882b77998SStan Tomov   int ierr;
30982b77998SStan Tomov 
31082b77998SStan Tomov   vec->SetArray = CeedVectorSetArray_Magma;
31182b77998SStan Tomov   vec->GetArray = CeedVectorGetArray_Magma;
31282b77998SStan Tomov   vec->GetArrayRead = CeedVectorGetArrayRead_Magma;
31382b77998SStan Tomov   vec->RestoreArray = CeedVectorRestoreArray_Magma;
31482b77998SStan Tomov   vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma;
31582b77998SStan Tomov   vec->Destroy = CeedVectorDestroy_Magma;
31682b77998SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
31793fbe329SStan Tomov   impl->darray = NULL;
318e2900954SStan Tomov   impl->array  = NULL;
319e2900954SStan Tomov   impl->own_ = 0;
320c119dad6SStan Tomov   impl->down_= 0;
32182b77998SStan Tomov   vec->data = impl;
32282b77998SStan Tomov   return 0;
32382b77998SStan Tomov }
32482b77998SStan Tomov 
3253bfcb0deSjeremylt 
32693fbe329SStan Tomov // *****************************************************************************
32793fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u
32893fbe329SStan Tomov // *****************************************************************************
32982b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r,
3303bfcb0deSjeremylt     CeedTransposeMode tmode,
33182b77998SStan Tomov     CeedTransposeMode lmode, CeedVector u,
33282b77998SStan Tomov     CeedVector v, CeedRequest *request) {
33382b77998SStan Tomov   CeedElemRestriction_Magma *impl = r->data;
33482b77998SStan Tomov   int ierr;
33582b77998SStan Tomov   const CeedScalar *uu;
33682b77998SStan Tomov   CeedScalar *vv;
3373bfcb0deSjeremylt   CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof,
3383bfcb0deSjeremylt           ncomp=r->ncomp;
339c119dad6SStan Tomov   CeedInt esize = nelem * elemsize;
34006b636dbSStan Tomov 
34106b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
342137a0714SStan Tomov   CeedInt *dindices = impl->dindices;
343c119dad6SStan Tomov   // Get pointers on the device
344c119dad6SStan Tomov   ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr);
345c119dad6SStan Tomov   ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr);
34606b636dbSStan Tomov #else
34706b636dbSStan Tomov   CeedInt *indices = impl->indices;
34806b636dbSStan Tomov   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
34906b636dbSStan Tomov   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
35006b636dbSStan Tomov #endif
351c119dad6SStan Tomov 
35282b77998SStan Tomov   if (tmode == CEED_NOTRANSPOSE) {
35382b77998SStan Tomov     // Perform: v = r * u
35482b77998SStan Tomov     if (ncomp == 1) {
35506b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
356c119dad6SStan Tomov magma_template<<i=0:esize>>
357c119dad6SStan Tomov       (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
358c119dad6SStan Tomov         vv[i] = uu[dindices[i]];
359e2900954SStan Tomov       }
36006b636dbSStan Tomov #else
36106b636dbSStan Tomov       for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]];
36206b636dbSStan Tomov #endif
36382b77998SStan Tomov     } else {
36482b77998SStan Tomov       // vv is (elemsize x ncomp x nelem), column-major
36582b77998SStan Tomov       if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
36606b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
36706b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
36806b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) {
36906b636dbSStan Tomov           vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d];
37006b636dbSStan Tomov         }
37106b636dbSStan Tomov #else
372c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
37382b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
374c119dad6SStan Tomov             for (CeedInt i=0; i < elemsize; i++) {
375c119dad6SStan Tomov               vv[i + elemsize*(d+ncomp*e)] =
376c119dad6SStan Tomov                 uu[indices[i+elemsize*e]+ndof*d];
377c119dad6SStan Tomov             }
37806b636dbSStan Tomov #endif
37982b77998SStan Tomov       } else { // u is (ncomp x ndof), column-major
38006b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
38106b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
38206b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
38306b636dbSStan Tomov           vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]];
38406b636dbSStan Tomov         }
38506b636dbSStan Tomov #else
386c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
38782b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
388c119dad6SStan Tomov             for (CeedInt i=0; i< elemsize; i++) {
389c119dad6SStan Tomov               vv[i + elemsize*(d+ncomp*e)] =
390c119dad6SStan Tomov                 uu[d+ncomp*indices[i+elemsize*e]];
391c119dad6SStan Tomov             }
39206b636dbSStan Tomov #endif
39382b77998SStan Tomov       }
39482b77998SStan Tomov     }
39582b77998SStan Tomov   } else {
39682b77998SStan Tomov     // Note: in transpose mode, we perform: v += r^t * u
39782b77998SStan Tomov     if (ncomp == 1) {
398c119dad6SStan Tomov       // fprintf(stderr,"3 ---------\n");
39906b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
400c119dad6SStan Tomov magma_template<<i=0:esize>>
401c119dad6SStan Tomov       (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
402c119dad6SStan Tomov         magmablas_datomic_add( &vv[dindices[i]], uu[i]);
403c119dad6SStan Tomov       }
40406b636dbSStan Tomov #else
40506b636dbSStan Tomov       for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i];
40606b636dbSStan Tomov #endif
407c119dad6SStan Tomov     } else { // u is (elemsize x ncomp x nelem)
408c119dad6SStan Tomov       fprintf(stderr,"2 ---------\n");
409c119dad6SStan Tomov 
41082b77998SStan Tomov       if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
41106b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
41206b636dbSStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
41306b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) {
41406b636dbSStan Tomov           magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d],
41506b636dbSStan Tomov                                  uu[i+iend*(d+e*dend)]);
41606b636dbSStan Tomov         }
41706b636dbSStan Tomov #else
418c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
41982b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
420c119dad6SStan Tomov             for (CeedInt i=0; i < elemsize; i++) {
421c119dad6SStan Tomov               vv[indices[i + elemsize*e]+ndof*d] +=
422c119dad6SStan Tomov                 uu[i + elemsize*(d+e*ncomp)];
423c119dad6SStan Tomov             }
42406b636dbSStan Tomov #endif
42506b636dbSStan Tomov       } else { // vv is (ncomp x ndof), column-major
42606b636dbSStan Tomov #ifdef USE_MAGMA_BATCH2
427c119dad6SStan Tomov magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
42806b636dbSStan Tomov         (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
42906b636dbSStan Tomov           magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]],
430c119dad6SStan Tomov                                  uu[i+iend*(d+e*dend)]);
43182b77998SStan Tomov         }
43206b636dbSStan Tomov #else
433c119dad6SStan Tomov         for (CeedInt e = 0; e < nelem; e++)
43482b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
435c119dad6SStan Tomov             for (CeedInt i=0; i < elemsize; i++) {
436c119dad6SStan Tomov               vv[d+ncomp*indices[i + elemsize*e]] +=
437c119dad6SStan Tomov                 uu[i + elemsize*(d+e*ncomp)];
438c119dad6SStan Tomov             }
43906b636dbSStan Tomov #endif
44082b77998SStan Tomov       }
44182b77998SStan Tomov     }
44282b77998SStan Tomov   }
44306b636dbSStan Tomov 
44482b77998SStan Tomov   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
44582b77998SStan Tomov   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
446c119dad6SStan Tomov 
44782b77998SStan Tomov   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
44882b77998SStan Tomov     *request = NULL;
44982b77998SStan Tomov   return 0;
45082b77998SStan Tomov }
45182b77998SStan Tomov 
45282b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) {
45382b77998SStan Tomov   CeedElemRestriction_Magma *impl = r->data;
45482b77998SStan Tomov   int ierr;
45582b77998SStan Tomov 
4561751b0a9SStan Tomov   // Free if we own the data
4571751b0a9SStan Tomov   if (impl->own_) {
4581751b0a9SStan Tomov     ierr = magma_free_pinned(impl->indices); CeedChk(ierr);
4591751b0a9SStan Tomov     ierr = magma_free(impl->dindices);       CeedChk(ierr);
4601981b6e4STzanio   } else if (impl->down_) {
461c119dad6SStan Tomov     ierr = magma_free(impl->dindices);       CeedChk(ierr);
462c119dad6SStan Tomov   }
46382b77998SStan Tomov   ierr = CeedFree(&r->data); CeedChk(ierr);
46482b77998SStan Tomov   return 0;
46582b77998SStan Tomov }
46682b77998SStan Tomov 
46782b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r,
46882b77998SStan Tomov     CeedMemType mtype,
46993fbe329SStan Tomov     CeedCopyMode cmode,
47093fbe329SStan Tomov     const CeedInt *indices) {
4711751b0a9SStan Tomov   int ierr, size = r->nelem*r->elemsize;
47282b77998SStan Tomov   CeedElemRestriction_Magma *impl;
47382b77998SStan Tomov 
4741751b0a9SStan Tomov   // Allocate memory for the MAGMA Restricton and initializa pointers to NULL
47582b77998SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
4761751b0a9SStan Tomov   impl->dindices = NULL;
4771751b0a9SStan Tomov   impl->indices  = NULL;
4781751b0a9SStan Tomov   impl->own_ = 0;
479c119dad6SStan Tomov   impl->down_= 0;
4801751b0a9SStan Tomov 
4811751b0a9SStan Tomov   if (mtype == CEED_MEM_HOST) {
4821751b0a9SStan Tomov     // memory is on the host; own_ = 0
48382b77998SStan Tomov     switch (cmode) {
48482b77998SStan Tomov     case CEED_COPY_VALUES:
4851751b0a9SStan Tomov       ierr = magma_malloc( (void**)&impl->dindices,
4861751b0a9SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
4871751b0a9SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->indices,
4881751b0a9SStan Tomov                                   size * sizeof(CeedInt)); CeedChk(ierr);
4891751b0a9SStan Tomov       impl->own_ = 1;
4901751b0a9SStan Tomov 
49106b636dbSStan Tomov       if (indices != NULL) {
49206b636dbSStan Tomov         memcpy(impl->indices, indices, size * sizeof(indices[0]));
4931751b0a9SStan Tomov         magma_setvector(size, sizeof(CeedInt),
49406b636dbSStan Tomov                         impl->indices, 1, impl->dindices, 1);
49506b636dbSStan Tomov       }
49682b77998SStan Tomov       break;
49782b77998SStan Tomov     case CEED_OWN_POINTER:
4981751b0a9SStan Tomov       ierr = magma_malloc( (void**)&impl->dindices,
4991751b0a9SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
5001751b0a9SStan Tomov       // TODO: possible problem here is if we are passed non-pinned memory;
5011751b0a9SStan Tomov       //       (as we own it, lter in destroy, we use free for pinned memory).
502137a0714SStan Tomov       impl->indices = (CeedInt *)indices;
5031751b0a9SStan Tomov       impl->own_ = 1;
5041751b0a9SStan Tomov 
505c119dad6SStan Tomov       if (indices != NULL)
5061751b0a9SStan Tomov         magma_setvector(size, sizeof(CeedInt),
5071751b0a9SStan Tomov                         indices, 1, impl->dindices, 1);
50882b77998SStan Tomov       break;
50982b77998SStan Tomov     case CEED_USE_POINTER:
510c119dad6SStan Tomov       ierr = magma_malloc( (void**)&impl->dindices,
511c119dad6SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
512c119dad6SStan Tomov       magma_setvector(size, sizeof(CeedInt),
513c119dad6SStan Tomov                       indices, 1, impl->dindices, 1);
514c119dad6SStan Tomov       impl->down_ = 1;
515137a0714SStan Tomov       impl->indices  = (CeedInt *)indices;
51682b77998SStan Tomov     }
5171751b0a9SStan Tomov   } else if (mtype == CEED_MEM_DEVICE) {
5181751b0a9SStan Tomov     // memory is on the device; own = 0
5191751b0a9SStan Tomov     switch (cmode) {
5201751b0a9SStan Tomov     case CEED_COPY_VALUES:
5211751b0a9SStan Tomov       ierr = magma_malloc( (void**)&impl->dindices,
5221751b0a9SStan Tomov                            size * sizeof(CeedInt)); CeedChk(ierr);
5231751b0a9SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->indices,
5241751b0a9SStan Tomov                                   size * sizeof(CeedInt)); CeedChk(ierr);
5251751b0a9SStan Tomov       impl->own_ = 1;
5261751b0a9SStan Tomov 
5271751b0a9SStan Tomov       if (indices)
5281751b0a9SStan Tomov         magma_copyvector(size, sizeof(CeedInt),
5291751b0a9SStan Tomov                          indices, 1, impl->dindices, 1);
5301751b0a9SStan Tomov       break;
5311751b0a9SStan Tomov     case CEED_OWN_POINTER:
532137a0714SStan Tomov       impl->dindices = (CeedInt *)indices;
5331751b0a9SStan Tomov       ierr = magma_malloc_pinned( (void**)&impl->indices,
5341751b0a9SStan Tomov                                   size * sizeof(CeedInt)); CeedChk(ierr);
5351751b0a9SStan Tomov       impl->own_ = 1;
5361751b0a9SStan Tomov 
5371751b0a9SStan Tomov       break;
5381751b0a9SStan Tomov     case CEED_USE_POINTER:
539137a0714SStan Tomov       impl->dindices = (CeedInt *)indices;
5401751b0a9SStan Tomov       impl->indices  = NULL;
5411751b0a9SStan Tomov     }
5421751b0a9SStan Tomov 
5431751b0a9SStan Tomov   } else
5441751b0a9SStan Tomov     return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported");
5451751b0a9SStan Tomov 
54682b77998SStan Tomov   r->data    = impl;
54782b77998SStan Tomov   r->Apply   = CeedElemRestrictionApply_Magma;
54882b77998SStan Tomov   r->Destroy = CeedElemRestrictionDestroy_Magma;
5491751b0a9SStan Tomov 
55082b77998SStan Tomov   return 0;
55182b77998SStan Tomov }
55282b77998SStan Tomov 
55382b77998SStan Tomov // Contracts on the middle index
55482b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc
55582b77998SStan Tomov // TRANSPOSE:   V_ajc = T_bj U_abc
55682b77998SStan Tomov // If Add != 0, "=" is replaced by "+="
55782b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed,
55882b77998SStan Tomov                                     CeedInt A, CeedInt B, CeedInt C, CeedInt J,
55982b77998SStan Tomov                                     const CeedScalar *t, CeedTransposeMode tmode,
56082b77998SStan Tomov                                     const CeedInt Add,
56182b77998SStan Tomov                                     const CeedScalar *u, CeedScalar *v) {
56238612b08SStan Tomov #ifdef USE_MAGMA_BATCH
56338612b08SStan Tomov   magma_dtensor_contract(ceed, A, B, C, J, t, tmode, Add, u, v);
56438612b08SStan Tomov #else
56582b77998SStan Tomov   CeedInt tstride0 = B, tstride1 = 1;
56682b77998SStan Tomov   if (tmode == CEED_TRANSPOSE) {
56782b77998SStan Tomov     tstride0 = 1; tstride1 = J;
56882b77998SStan Tomov   }
56938612b08SStan Tomov   CeedDebug("\033[31m[CeedTensorContract] A=%d, J=%d, C=%d, B=%d: %d %d %d",
57038612b08SStan Tomov             A,J,C,B,A*J*B*C, C*J*A, C*B*A);
57182b77998SStan Tomov   for (CeedInt a=0; a<A; a++) {
57282b77998SStan Tomov     for (CeedInt j=0; j<J; j++) {
57382b77998SStan Tomov       if (!Add) {
57482b77998SStan Tomov         for (CeedInt c=0; c<C; c++)
57582b77998SStan Tomov           v[(a*J+j)*C+c] = 0;
57682b77998SStan Tomov       }
57782b77998SStan Tomov       for (CeedInt b=0; b<B; b++) {
57882b77998SStan Tomov         for (CeedInt c=0; c<C; c++) {
57982b77998SStan Tomov           v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c];
58082b77998SStan Tomov         }
58182b77998SStan Tomov       }
58282b77998SStan Tomov     }
58382b77998SStan Tomov   }
58438612b08SStan Tomov #endif
58582b77998SStan Tomov   return 0;
58682b77998SStan Tomov }
58782b77998SStan Tomov 
588d3181881Sjeremylt static int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
589d3181881Sjeremylt                                 CeedTransposeMode tmode, CeedEvalMode emode,
59082b77998SStan Tomov                                 const CeedScalar *u, CeedScalar *v) {
59182b77998SStan Tomov   int ierr;
59282b77998SStan Tomov   const CeedInt dim = basis->dim;
5930f5de9e9Sjeremylt   const CeedInt ncomp = basis->ncomp;
5940f5de9e9Sjeremylt   const CeedInt nqpt = ncomp*CeedPowInt(basis->Q1d, dim);
59582b77998SStan Tomov   const CeedInt add = (tmode == CEED_TRANSPOSE);
59682b77998SStan Tomov 
597*c44a85f4Sjeremylt   if (nelem != 1)
598*c44a85f4Sjeremylt     return CeedError(basis->ceed, 1,
599*c44a85f4Sjeremylt                      "This backend does not support BasisApply for multiple elements");
600*c44a85f4Sjeremylt 
60138612b08SStan Tomov   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d",
6020f5de9e9Sjeremylt             ncomp*CeedPowInt(basis->P1d, dim));
60338612b08SStan Tomov 
60482b77998SStan Tomov   if (tmode == CEED_TRANSPOSE) {
6050f5de9e9Sjeremylt     const CeedInt vsize = ncomp*CeedPowInt(basis->P1d, dim);
60682b77998SStan Tomov     for (CeedInt i = 0; i < vsize; i++)
60782b77998SStan Tomov       v[i] = (CeedScalar) 0;
60882b77998SStan Tomov   }
60982b77998SStan Tomov   if (emode & CEED_EVAL_INTERP) {
61082b77998SStan Tomov     CeedInt P = basis->P1d, Q = basis->Q1d;
61182b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
61282b77998SStan Tomov       P = basis->Q1d; Q = basis->P1d;
61382b77998SStan Tomov     }
6140f5de9e9Sjeremylt     CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1;
6150f5de9e9Sjeremylt     CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)];
61638612b08SStan Tomov     CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d",
6170f5de9e9Sjeremylt               ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1));
61882b77998SStan Tomov     for (CeedInt d=0; d<dim; d++) {
61982b77998SStan Tomov       ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d,
62082b77998SStan Tomov                                       tmode, add&&(d==dim-1),
62182b77998SStan Tomov                                       d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
62282b77998SStan Tomov       CeedChk(ierr);
62382b77998SStan Tomov       pre /= P;
62482b77998SStan Tomov       post *= Q;
62582b77998SStan Tomov     }
62682b77998SStan Tomov     if (tmode == CEED_NOTRANSPOSE) {
62782b77998SStan Tomov       v += nqpt;
62882b77998SStan Tomov     } else {
62982b77998SStan Tomov       u += nqpt;
63082b77998SStan Tomov     }
63182b77998SStan Tomov   }
63282b77998SStan Tomov   if (emode & CEED_EVAL_GRAD) {
63382b77998SStan Tomov     CeedInt P = basis->P1d, Q = basis->Q1d;
63482b77998SStan Tomov     // In CEED_NOTRANSPOSE mode:
6350f5de9e9Sjeremylt     // u is (P^dim x nc), column-major layout (nc = ncomp)
6360f5de9e9Sjeremylt     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
63782b77998SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
63882b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
63982b77998SStan Tomov       P = basis->Q1d, Q = basis->P1d;
64082b77998SStan Tomov     }
6410f5de9e9Sjeremylt     CeedScalar tmp[2][ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1)];
64238612b08SStan Tomov     CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d",
6430f5de9e9Sjeremylt               ncomp*Q*CeedPowInt(P>Q?P:Q, dim-1));
64482b77998SStan Tomov     for (CeedInt p = 0; p < dim; p++) {
6450f5de9e9Sjeremylt       CeedInt pre = ncomp*CeedPowInt(P, dim-1), post = 1;
64682b77998SStan Tomov       for (CeedInt d=0; d<dim; d++) {
64782b77998SStan Tomov         ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q,
64882b77998SStan Tomov                                         (p==d)?basis->grad1d:basis->interp1d,
64982b77998SStan Tomov                                         tmode, add&&(d==dim-1),
65082b77998SStan Tomov                                         d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
65182b77998SStan Tomov         CeedChk(ierr);
65282b77998SStan Tomov         pre /= P;
65382b77998SStan Tomov         post *= Q;
65482b77998SStan Tomov       }
65582b77998SStan Tomov       if (tmode == CEED_NOTRANSPOSE) {
65682b77998SStan Tomov         v += nqpt;
65782b77998SStan Tomov       } else {
65882b77998SStan Tomov         u += nqpt;
65982b77998SStan Tomov       }
66082b77998SStan Tomov     }
66182b77998SStan Tomov   }
66282b77998SStan Tomov   if (emode & CEED_EVAL_WEIGHT) {
66382b77998SStan Tomov     if (tmode == CEED_TRANSPOSE)
66482b77998SStan Tomov       return CeedError(basis->ceed, 1,
66582b77998SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
66682b77998SStan Tomov     CeedInt Q = basis->Q1d;
66782b77998SStan Tomov     for (CeedInt d=0; d<dim; d++) {
66882b77998SStan Tomov       CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d);
66982b77998SStan Tomov       for (CeedInt i=0; i<pre; i++) {
67082b77998SStan Tomov         for (CeedInt j=0; j<Q; j++) {
67182b77998SStan Tomov           for (CeedInt k=0; k<post; k++) {
67282b77998SStan Tomov             v[(i*Q + j)*post + k] = basis->qweight1d[j]
67382b77998SStan Tomov                                     * (d == 0 ? 1 : v[(i*Q + j)*post + k]);
67482b77998SStan Tomov           }
67582b77998SStan Tomov         }
67682b77998SStan Tomov       }
67782b77998SStan Tomov     }
67882b77998SStan Tomov   }
67982b77998SStan Tomov   return 0;
68082b77998SStan Tomov }
68182b77998SStan Tomov 
68282b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) {
68382b77998SStan Tomov   return 0;
68482b77998SStan Tomov }
68582b77998SStan Tomov 
68682b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d,
68782b77998SStan Tomov     CeedInt Q1d, const CeedScalar *interp1d,
68882b77998SStan Tomov     const CeedScalar *grad1d,
68982b77998SStan Tomov     const CeedScalar *qref1d,
69082b77998SStan Tomov     const CeedScalar *qweight1d,
69182b77998SStan Tomov     CeedBasis basis) {
69282b77998SStan Tomov   basis->Apply = CeedBasisApply_Magma;
69382b77998SStan Tomov   basis->Destroy = CeedBasisDestroy_Magma;
69482b77998SStan Tomov   return 0;
69582b77998SStan Tomov }
69682b77998SStan Tomov 
6973bfcb0deSjeremylt static int CeedQFunctionApply_Magma(CeedQFunction qf, CeedInt Q,
69882b77998SStan Tomov                                     const CeedScalar *const *u,
69982b77998SStan Tomov                                     CeedScalar *const *v) {
70082b77998SStan Tomov   int ierr;
7013bfcb0deSjeremylt   ierr = qf->function(qf->ctx, Q, u, v); CeedChk(ierr);
70282b77998SStan Tomov   return 0;
70382b77998SStan Tomov }
70482b77998SStan Tomov 
70582b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) {
70682b77998SStan Tomov   return 0;
70782b77998SStan Tomov }
70882b77998SStan Tomov 
70982b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) {
71082b77998SStan Tomov   qf->Apply = CeedQFunctionApply_Magma;
71182b77998SStan Tomov   qf->Destroy = CeedQFunctionDestroy_Magma;
71282b77998SStan Tomov   return 0;
71382b77998SStan Tomov }
71482b77998SStan Tomov 
71582b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) {
71682b77998SStan Tomov   CeedOperator_Magma *impl = op->data;
71782b77998SStan Tomov   int ierr;
71882b77998SStan Tomov 
7193bfcb0deSjeremylt   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
7203bfcb0deSjeremylt     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
7213bfcb0deSjeremylt   }
7223bfcb0deSjeremylt   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
7233bfcb0deSjeremylt   ierr = CeedFree(&impl->edata); CeedChk(ierr);
7243bfcb0deSjeremylt 
7253bfcb0deSjeremylt   for (CeedInt i=0; i<impl->numqin+impl->numqout; i++) {
7263bfcb0deSjeremylt     ierr = CeedFree(&impl->qdata_alloc[i]); CeedChk(ierr);
7273bfcb0deSjeremylt   }
7283bfcb0deSjeremylt   ierr = CeedFree(&impl->qdata_alloc); CeedChk(ierr);
7293bfcb0deSjeremylt   ierr = CeedFree(&impl->qdata); CeedChk(ierr);
7303bfcb0deSjeremylt 
7313bfcb0deSjeremylt   ierr = CeedFree(&impl->indata); CeedChk(ierr);
7323bfcb0deSjeremylt   ierr = CeedFree(&impl->outdata); CeedChk(ierr);
7333bfcb0deSjeremylt 
73482b77998SStan Tomov   ierr = CeedFree(&op->data); CeedChk(ierr);
73582b77998SStan Tomov   return 0;
73682b77998SStan Tomov }
73782b77998SStan Tomov 
73808ed4d02Sjeremylt 
7393bfcb0deSjeremylt /*
7403bfcb0deSjeremylt   Setup infields or outfields
7413bfcb0deSjeremylt  */
7423bfcb0deSjeremylt static int CeedOperatorSetupFields_Magma(struct CeedQFunctionField qfields[16],
7433bfcb0deSjeremylt                                        struct CeedOperatorField ofields[16],
7443bfcb0deSjeremylt                                        CeedVector *evecs, CeedScalar **qdata,
7453bfcb0deSjeremylt                                        CeedScalar **qdata_alloc, CeedScalar **indata,
7463bfcb0deSjeremylt                                        CeedInt starti, CeedInt starte,
7473bfcb0deSjeremylt                                        CeedInt startq, CeedInt numfields, CeedInt Q) {
7483bfcb0deSjeremylt   CeedInt dim, ierr, ie=starte, iq=startq, ncomp;
74982b77998SStan Tomov 
7503bfcb0deSjeremylt   // Loop over fields
7513bfcb0deSjeremylt   for (CeedInt i=0; i<numfields; i++) {
752668048e2SJed Brown     if (ofields[i].Erestrict != CEED_RESTRICTION_IDENTITY) {
7533bfcb0deSjeremylt       ierr = CeedElemRestrictionCreateVector(ofields[i].Erestrict, NULL, &evecs[ie]);
75482b77998SStan Tomov       CeedChk(ierr);
7553bfcb0deSjeremylt       ie++;
75682b77998SStan Tomov     }
7573bfcb0deSjeremylt     CeedEvalMode emode = qfields[i].emode;
7583bfcb0deSjeremylt     switch(emode) {
7593bfcb0deSjeremylt     case CEED_EVAL_NONE:
7603bfcb0deSjeremylt       break; // No action
7613bfcb0deSjeremylt     case CEED_EVAL_INTERP:
7623bfcb0deSjeremylt       ncomp = qfields[i].ncomp;
7633bfcb0deSjeremylt       ierr = CeedMalloc(Q*ncomp, &qdata_alloc[iq]); CeedChk(ierr);
7643bfcb0deSjeremylt       qdata[i + starti] = qdata_alloc[iq];
7653bfcb0deSjeremylt       iq++;
7663bfcb0deSjeremylt       break;
7673bfcb0deSjeremylt     case CEED_EVAL_GRAD:
7683bfcb0deSjeremylt       ncomp = qfields[i].ncomp;
7693bfcb0deSjeremylt       dim = ofields[i].basis->dim;
7703bfcb0deSjeremylt       ierr = CeedMalloc(Q*ncomp*dim, &qdata_alloc[iq]); CeedChk(ierr);
7713bfcb0deSjeremylt       qdata[i + starti] = qdata_alloc[iq];
7723bfcb0deSjeremylt       iq++;
7733bfcb0deSjeremylt       break;
7743bfcb0deSjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
7753bfcb0deSjeremylt       ierr = CeedMalloc(Q, &qdata_alloc[iq]); CeedChk(ierr);
776d3181881Sjeremylt       ierr = CeedBasisApply(ofields[iq].basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
7773bfcb0deSjeremylt                             NULL, qdata_alloc[iq]); CeedChk(ierr);
7783bfcb0deSjeremylt       qdata[i] = qdata_alloc[iq];
7793bfcb0deSjeremylt       indata[i] = qdata[i];
7803bfcb0deSjeremylt       iq++;
7813bfcb0deSjeremylt       break;
7823bfcb0deSjeremylt     case CEED_EVAL_DIV:
7833bfcb0deSjeremylt       break; // Not implimented
7843bfcb0deSjeremylt     case CEED_EVAL_CURL:
7853bfcb0deSjeremylt       break; // Not implimented
78682b77998SStan Tomov     }
7873bfcb0deSjeremylt   }
78882b77998SStan Tomov   return 0;
78982b77998SStan Tomov }
79082b77998SStan Tomov 
7913bfcb0deSjeremylt /*
7923bfcb0deSjeremylt   CeedOperator needs to connect all the named fields (be they active or passive)
7933bfcb0deSjeremylt   to the named inputs and outputs of its CeedQFunction.
7943bfcb0deSjeremylt  */
7953bfcb0deSjeremylt static int CeedOperatorSetup_Magma(CeedOperator op) {
7963bfcb0deSjeremylt   if (op->setupdone) return 0;
7976a35ea15Sjeremylt   CeedOperator_Magma *opmagma = op->data;
7983bfcb0deSjeremylt   CeedQFunction qf = op->qf;
7993bfcb0deSjeremylt   CeedInt Q = op->numqpoints;
80082b77998SStan Tomov   int ierr;
80182b77998SStan Tomov 
8023bfcb0deSjeremylt   // Count infield and outfield array sizes and evectors
8033bfcb0deSjeremylt   for (CeedInt i=0; i<qf->numinputfields; i++) {
8043bfcb0deSjeremylt     CeedEvalMode emode = qf->inputfields[i].emode;
805668048e2SJed Brown     opmagma->numqin += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD) + !!
8063bfcb0deSjeremylt                      (emode & CEED_EVAL_WEIGHT);
8076a35ea15Sjeremylt     opmagma->numein +=
808cb661df1Sjeremylt       (op->inputfields[i].Erestrict != CEED_RESTRICTION_IDENTITY); // Need E-vector when non-identity restriction exists
80982b77998SStan Tomov   }
8103bfcb0deSjeremylt   for (CeedInt i=0; i<qf->numoutputfields; i++) {
8113bfcb0deSjeremylt     CeedEvalMode emode = qf->outputfields[i].emode;
8126a35ea15Sjeremylt     opmagma->numqout += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD);
813cb661df1Sjeremylt     opmagma->numeout += (op->outputfields[i].Erestrict != CEED_RESTRICTION_IDENTITY);
8143bfcb0deSjeremylt   }
8153bfcb0deSjeremylt 
8163bfcb0deSjeremylt   // Allocate
817668048e2SJed Brown   ierr = CeedCalloc(opmagma->numein + opmagma->numeout, &opmagma->evecs); CeedChk(ierr);
8186a35ea15Sjeremylt   ierr = CeedCalloc(qf->numinputfields + qf->numoutputfields, &opmagma->edata);
8193bfcb0deSjeremylt   CeedChk(ierr);
8203bfcb0deSjeremylt 
8216a35ea15Sjeremylt   ierr = CeedCalloc(opmagma->numqin + opmagma->numqout, &opmagma->qdata_alloc);
8223bfcb0deSjeremylt   CeedChk(ierr);
8236a35ea15Sjeremylt   ierr = CeedCalloc(qf->numinputfields + qf->numoutputfields, &opmagma->qdata);
8243bfcb0deSjeremylt   CeedChk(ierr);
8253bfcb0deSjeremylt 
8266a35ea15Sjeremylt   ierr = CeedCalloc(16, &opmagma->indata); CeedChk(ierr);
8276a35ea15Sjeremylt   ierr = CeedCalloc(16, &opmagma->outdata); CeedChk(ierr);
8283bfcb0deSjeremylt 
8293bfcb0deSjeremylt   // Set up infield and outfield pointer arrays
8303bfcb0deSjeremylt   // Infields
8313bfcb0deSjeremylt   ierr = CeedOperatorSetupFields_Magma(qf->inputfields, op->inputfields,
8326a35ea15Sjeremylt                                      opmagma->evecs, opmagma->qdata, opmagma->qdata_alloc,
8336a35ea15Sjeremylt                                      opmagma->indata, 0, 0, 0,
8343bfcb0deSjeremylt                                      qf->numinputfields, Q); CeedChk(ierr);
8353bfcb0deSjeremylt 
8363bfcb0deSjeremylt   // Outfields
8373bfcb0deSjeremylt   ierr = CeedOperatorSetupFields_Magma(qf->outputfields, op->outputfields,
8386a35ea15Sjeremylt                                      opmagma->evecs, opmagma->qdata, opmagma->qdata_alloc,
8396a35ea15Sjeremylt                                      opmagma->indata, qf->numinputfields, opmagma->numein,
8406a35ea15Sjeremylt                                      opmagma->numqin, qf->numoutputfields, Q); CeedChk(ierr);
8413bfcb0deSjeremylt 
84208ed4d02Sjeremylt   // Output Qvecs
84308ed4d02Sjeremylt   for (CeedInt i=0; i<qf->numoutputfields; i++) {
84408ed4d02Sjeremylt     CeedEvalMode emode = qf->outputfields[i].emode;
84508ed4d02Sjeremylt     if (emode != CEED_EVAL_NONE) {
84608ed4d02Sjeremylt       opmagma->outdata[i] =  opmagma->qdata[i + qf->numinputfields];
84708ed4d02Sjeremylt     }
84808ed4d02Sjeremylt   }
84908ed4d02Sjeremylt 
8503bfcb0deSjeremylt   op->setupdone = 1;
8513bfcb0deSjeremylt 
8523bfcb0deSjeremylt   return 0;
8533bfcb0deSjeremylt }
8543bfcb0deSjeremylt 
85568e7ed8aSjeremylt static int CeedOperatorApply_Magma(CeedOperator op, CeedVector invec,
8563bfcb0deSjeremylt                                  CeedVector outvec, CeedRequest *request) {
85768e7ed8aSjeremylt   CeedOperator_Magma *opmagma = op->data;
8583bfcb0deSjeremylt   CeedInt Q = op->numqpoints, elemsize;
8593bfcb0deSjeremylt   int ierr;
8603bfcb0deSjeremylt   CeedQFunction qf = op->qf;
8613bfcb0deSjeremylt   CeedTransposeMode lmode = CEED_NOTRANSPOSE;
8626a35ea15Sjeremylt   CeedScalar *vec_temp;
8633bfcb0deSjeremylt 
8643bfcb0deSjeremylt   // Setup
86568e7ed8aSjeremylt   ierr = CeedOperatorSetup_Magma(op); CeedChk(ierr);
8663bfcb0deSjeremylt 
8673bfcb0deSjeremylt   // Input Evecs and Restriction
8683bfcb0deSjeremylt   for (CeedInt i=0,iein=0; i<qf->numinputfields; i++) {
869668048e2SJed Brown     // No Restriction
870668048e2SJed Brown     if (op->inputfields[i].Erestrict == CEED_RESTRICTION_IDENTITY) {
871668048e2SJed Brown       CeedEvalMode emode = qf->inputfields[i].emode;
872668048e2SJed Brown       if (emode & CEED_EVAL_WEIGHT) {
873668048e2SJed Brown       } else {
874668048e2SJed Brown         // Active
875668048e2SJed Brown         if (op->inputfields[i].vec == CEED_VECTOR_ACTIVE) {
876668048e2SJed Brown           ierr = CeedVectorGetArrayRead(invec, CEED_MEM_HOST,
877668048e2SJed Brown                                         (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr);
878668048e2SJed Brown           // Passive
879668048e2SJed Brown         } else {
880668048e2SJed Brown           ierr = CeedVectorGetArrayRead(op->inputfields[i].vec, CEED_MEM_HOST,
881668048e2SJed Brown                                         (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr);
882668048e2SJed Brown         }
883668048e2SJed Brown       }
884668048e2SJed Brown     } else {
8853bfcb0deSjeremylt       // Restriction
8866a35ea15Sjeremylt       // Zero evec
8870f5de9e9Sjeremylt       ierr = CeedVectorGetArray(opmagma->evecs[iein], CEED_MEM_HOST, &vec_temp);
8880f5de9e9Sjeremylt       CeedChk(ierr);
8896a35ea15Sjeremylt       for (CeedInt j=0; j<opmagma->evecs[iein]->length; j++)
8906a35ea15Sjeremylt         vec_temp[j] = 0.;
8916a35ea15Sjeremylt       ierr = CeedVectorRestoreArray(opmagma->evecs[iein], &vec_temp); CeedChk(ierr);
892668048e2SJed Brown       // Active
893668048e2SJed Brown       if (op->inputfields[i].vec == CEED_VECTOR_ACTIVE) {
894668048e2SJed Brown         // Restrict
895668048e2SJed Brown         ierr = CeedElemRestrictionApply(op->inputfields[i].Erestrict, CEED_NOTRANSPOSE,
896668048e2SJed Brown                                         lmode, invec, opmagma->evecs[iein],
897668048e2SJed Brown                                         request); CeedChk(ierr);
898668048e2SJed Brown         // Get evec
899668048e2SJed Brown         ierr = CeedVectorGetArrayRead(opmagma->evecs[iein], CEED_MEM_HOST,
900668048e2SJed Brown                                       (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr);
901668048e2SJed Brown         iein++;
902668048e2SJed Brown       } else {
9033bfcb0deSjeremylt         // Passive
9046a35ea15Sjeremylt         // Restrict
9053bfcb0deSjeremylt         ierr = CeedElemRestrictionApply(op->inputfields[i].Erestrict, CEED_NOTRANSPOSE,
9066a35ea15Sjeremylt                                         lmode, op->inputfields[i].vec, opmagma->evecs[iein],
9073bfcb0deSjeremylt                                         request); CeedChk(ierr);
9086a35ea15Sjeremylt         // Get evec
9096a35ea15Sjeremylt         ierr = CeedVectorGetArrayRead(opmagma->evecs[iein], CEED_MEM_HOST,
9106a35ea15Sjeremylt                                       (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr);
9113bfcb0deSjeremylt         iein++;
9123bfcb0deSjeremylt       }
9133bfcb0deSjeremylt     }
9143bfcb0deSjeremylt   }
9153bfcb0deSjeremylt 
9163bfcb0deSjeremylt   // Output Evecs
9176a35ea15Sjeremylt   for (CeedInt i=0,ieout=opmagma->numein; i<qf->numoutputfields; i++) {
918668048e2SJed Brown     // No Restriction
919668048e2SJed Brown     if (op->outputfields[i].Erestrict == CEED_RESTRICTION_IDENTITY) {
920668048e2SJed Brown       // Active
921668048e2SJed Brown       if (op->outputfields[i].vec == CEED_VECTOR_ACTIVE) {
922668048e2SJed Brown         ierr = CeedVectorGetArray(outvec, CEED_MEM_HOST,
923668048e2SJed Brown                                   &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr);
924668048e2SJed Brown       } else {
925668048e2SJed Brown         // Passive
926668048e2SJed Brown         ierr = CeedVectorGetArray(op->outputfields[i].vec, CEED_MEM_HOST,
927668048e2SJed Brown                                   &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr);
928668048e2SJed Brown       }
929668048e2SJed Brown     } else {
9303bfcb0deSjeremylt       // Restriction
9316a35ea15Sjeremylt       ierr = CeedVectorGetArray(opmagma->evecs[ieout], CEED_MEM_HOST,
9326a35ea15Sjeremylt                                 &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr);
9333bfcb0deSjeremylt       ieout++;
9343bfcb0deSjeremylt     }
9353bfcb0deSjeremylt   }
9363bfcb0deSjeremylt 
9373bfcb0deSjeremylt   // Loop through elements
9383bfcb0deSjeremylt   for (CeedInt e=0; e<op->numelements; e++) {
9393bfcb0deSjeremylt     // Input basis apply if needed
9403bfcb0deSjeremylt     for (CeedInt i=0; i<qf->numinputfields; i++) {
9413bfcb0deSjeremylt       // Get elemsize
942668048e2SJed Brown       if (op->inputfields[i].Erestrict != CEED_RESTRICTION_IDENTITY) {
9433bfcb0deSjeremylt         elemsize = op->inputfields[i].Erestrict->elemsize;
9443bfcb0deSjeremylt       } else {
9453bfcb0deSjeremylt         elemsize = Q;
9463bfcb0deSjeremylt       }
9473bfcb0deSjeremylt       // Get emode, ncomp
9483bfcb0deSjeremylt       CeedEvalMode emode = qf->inputfields[i].emode;
9493bfcb0deSjeremylt       CeedInt ncomp = qf->inputfields[i].ncomp;
9503bfcb0deSjeremylt       // Basis action
9513bfcb0deSjeremylt       switch(emode) {
9523bfcb0deSjeremylt       case CEED_EVAL_NONE:
9536a35ea15Sjeremylt         opmagma->indata[i] = &opmagma->edata[i][e*Q*ncomp];
9543bfcb0deSjeremylt         break;
9553bfcb0deSjeremylt       case CEED_EVAL_INTERP:
956d3181881Sjeremylt         ierr = CeedBasisApply(op->inputfields[i].basis, 1, CEED_NOTRANSPOSE,
9576a35ea15Sjeremylt                               CEED_EVAL_INTERP, &opmagma->edata[i][e*elemsize*ncomp], opmagma->qdata[i]);
9583bfcb0deSjeremylt         CeedChk(ierr);
9596a35ea15Sjeremylt         opmagma->indata[i] = opmagma->qdata[i];
9603bfcb0deSjeremylt         break;
9613bfcb0deSjeremylt       case CEED_EVAL_GRAD:
962d3181881Sjeremylt         ierr = CeedBasisApply(op->inputfields[i].basis, 1, CEED_NOTRANSPOSE,
9636a35ea15Sjeremylt                               CEED_EVAL_GRAD, &opmagma->edata[i][e*elemsize*ncomp], opmagma->qdata[i]);
9643bfcb0deSjeremylt         CeedChk(ierr);
9656a35ea15Sjeremylt         opmagma->indata[i] = opmagma->qdata[i];
9663bfcb0deSjeremylt         break;
9673bfcb0deSjeremylt       case CEED_EVAL_WEIGHT:
9683bfcb0deSjeremylt         break;  // No action
9693bfcb0deSjeremylt       case CEED_EVAL_DIV:
9703bfcb0deSjeremylt         break; // Not implimented
9713bfcb0deSjeremylt       case CEED_EVAL_CURL:
9723bfcb0deSjeremylt         break; // Not implimented
9733bfcb0deSjeremylt       }
9743bfcb0deSjeremylt     }
9753bfcb0deSjeremylt     // Output pointers
9763bfcb0deSjeremylt     for (CeedInt i=0; i<qf->numoutputfields; i++) {
9773bfcb0deSjeremylt       CeedEvalMode emode = qf->outputfields[i].emode;
9783bfcb0deSjeremylt       if (emode == CEED_EVAL_NONE) {
9793bfcb0deSjeremylt         CeedInt ncomp = qf->outputfields[i].ncomp;
9806a35ea15Sjeremylt         opmagma->outdata[i] = &opmagma->edata[i + qf->numinputfields][e*Q*ncomp];
9813bfcb0deSjeremylt       }
9823bfcb0deSjeremylt     }
9833bfcb0deSjeremylt     // Q function
984668048e2SJed Brown     ierr = CeedQFunctionApply(op->qf, Q, (const CeedScalar * const*) opmagma->indata,
9856a35ea15Sjeremylt                               opmagma->outdata); CeedChk(ierr);
9863bfcb0deSjeremylt 
9873bfcb0deSjeremylt     // Output basis apply if needed
9883bfcb0deSjeremylt     for (CeedInt i=0; i<qf->numoutputfields; i++) {
9893bfcb0deSjeremylt       // Get elemsize
990668048e2SJed Brown       if (op->outputfields[i].Erestrict != CEED_RESTRICTION_IDENTITY) {
9913bfcb0deSjeremylt         elemsize = op->outputfields[i].Erestrict->elemsize;
9923bfcb0deSjeremylt       } else {
9933bfcb0deSjeremylt         elemsize = Q;
9943bfcb0deSjeremylt       }
9953bfcb0deSjeremylt       // Get emode, ncomp
9963bfcb0deSjeremylt       CeedInt ncomp = qf->outputfields[i].ncomp;
9973bfcb0deSjeremylt       CeedEvalMode emode = qf->outputfields[i].emode;
9983bfcb0deSjeremylt       // Basis action
9993bfcb0deSjeremylt       switch(emode) {
10003bfcb0deSjeremylt       case CEED_EVAL_NONE:
10013bfcb0deSjeremylt         break; // No action
10023bfcb0deSjeremylt       case CEED_EVAL_INTERP:
1003d3181881Sjeremylt         ierr = CeedBasisApply(op->outputfields[i].basis, 1, CEED_TRANSPOSE,
10046a35ea15Sjeremylt                               CEED_EVAL_INTERP, opmagma->outdata[i],
10056a35ea15Sjeremylt                               &opmagma->edata[i + qf->numinputfields][e*elemsize*ncomp]); CeedChk(ierr);
10063bfcb0deSjeremylt         break;
10073bfcb0deSjeremylt       case CEED_EVAL_GRAD:
1008d3181881Sjeremylt         ierr = CeedBasisApply(op->outputfields[i].basis, 1, CEED_TRANSPOSE, CEED_EVAL_GRAD,
10096a35ea15Sjeremylt                               opmagma->outdata[i], &opmagma->edata[i + qf->numinputfields][e*elemsize*ncomp]);
10103bfcb0deSjeremylt         CeedChk(ierr);
10113bfcb0deSjeremylt         break;
10123bfcb0deSjeremylt       case CEED_EVAL_WEIGHT:
10133bfcb0deSjeremylt         break; // Should not occur
10143bfcb0deSjeremylt       case CEED_EVAL_DIV:
10153bfcb0deSjeremylt         break; // Not implimented
10163bfcb0deSjeremylt       case CEED_EVAL_CURL:
10173bfcb0deSjeremylt         break; // Not implimented
10183bfcb0deSjeremylt       }
10193bfcb0deSjeremylt     }
10203bfcb0deSjeremylt   }
10213bfcb0deSjeremylt 
10223bfcb0deSjeremylt   // Output restriction
10236a35ea15Sjeremylt   for (CeedInt i=0,ieout=opmagma->numein; i<qf->numoutputfields; i++) {
1024668048e2SJed Brown     // No Restriction
1025668048e2SJed Brown     if (op->outputfields[i].Erestrict == CEED_RESTRICTION_IDENTITY) {
10263bfcb0deSjeremylt       // Active
1027668048e2SJed Brown       if (op->outputfields[i].vec == CEED_VECTOR_ACTIVE) {
1028668048e2SJed Brown         ierr = CeedVectorRestoreArray(outvec, &opmagma->edata[i + qf->numinputfields]);
1029668048e2SJed Brown         CeedChk(ierr);
1030668048e2SJed Brown       } else {
1031668048e2SJed Brown         // Passive
1032668048e2SJed Brown         ierr = CeedVectorRestoreArray(op->outputfields[i].vec,
1033668048e2SJed Brown                                       &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr);
1034668048e2SJed Brown       }
1035668048e2SJed Brown     } else {
1036668048e2SJed Brown       // Restriction
1037668048e2SJed Brown       // Active
1038668048e2SJed Brown       if (op->outputfields[i].vec == CEED_VECTOR_ACTIVE) {
10396a35ea15Sjeremylt         // Restore evec
10406a35ea15Sjeremylt         ierr = CeedVectorRestoreArray(opmagma->evecs[ieout],
10416a35ea15Sjeremylt                                       &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr);
10426a35ea15Sjeremylt         // Zero lvec
10436a35ea15Sjeremylt         ierr = CeedVectorGetArray(outvec, CEED_MEM_HOST, &vec_temp); CeedChk(ierr);
10446a35ea15Sjeremylt         for (CeedInt j=0; j<outvec->length; j++)
10456a35ea15Sjeremylt           vec_temp[j] = 0.;
10466a35ea15Sjeremylt         ierr = CeedVectorRestoreArray(outvec, &vec_temp); CeedChk(ierr);
10476a35ea15Sjeremylt         // Restrict
10483bfcb0deSjeremylt         ierr = CeedElemRestrictionApply(op->outputfields[i].Erestrict, CEED_TRANSPOSE,
10496a35ea15Sjeremylt                                         lmode, opmagma->evecs[ieout], outvec, request); CeedChk(ierr);
10503bfcb0deSjeremylt         ieout++;
10513bfcb0deSjeremylt       } else {
10523bfcb0deSjeremylt         // Passive
1053668048e2SJed Brown         // Restore evec
1054668048e2SJed Brown         ierr = CeedVectorRestoreArray(opmagma->evecs[ieout],
10556a35ea15Sjeremylt                                       &opmagma->edata[i + qf->numinputfields]); CeedChk(ierr);
1056668048e2SJed Brown         // Zero lvec
1057668048e2SJed Brown         ierr = CeedVectorGetArray(op->outputfields[i].vec, CEED_MEM_HOST, &vec_temp);
10583bfcb0deSjeremylt         CeedChk(ierr);
1059668048e2SJed Brown         for (CeedInt j=0; j<op->outputfields[i].vec->length; j++)
1060668048e2SJed Brown           vec_temp[j] = 0.;
1061668048e2SJed Brown         ierr = CeedVectorRestoreArray(op->outputfields[i].vec, &vec_temp);
1062668048e2SJed Brown         CeedChk(ierr);
1063668048e2SJed Brown         // Restrict
1064668048e2SJed Brown         ierr = CeedElemRestrictionApply(op->outputfields[i].Erestrict, CEED_TRANSPOSE,
1065668048e2SJed Brown                                         lmode, opmagma->evecs[ieout], op->outputfields[i].vec,
1066668048e2SJed Brown                                         request); CeedChk(ierr);
1067668048e2SJed Brown         ieout++;
10683bfcb0deSjeremylt       }
10693bfcb0deSjeremylt     }
10703bfcb0deSjeremylt   }
10713bfcb0deSjeremylt 
10726a35ea15Sjeremylt   // Restore input arrays
10736a35ea15Sjeremylt   for (CeedInt i=0,iein=0; i<qf->numinputfields; i++) {
1074668048e2SJed Brown     // No Restriction
1075668048e2SJed Brown     if (op->inputfields[i].Erestrict == CEED_RESTRICTION_IDENTITY) {
10766a35ea15Sjeremylt       CeedEvalMode emode = qf->inputfields[i].emode;
10776a35ea15Sjeremylt       if (emode & CEED_EVAL_WEIGHT) {
10786a35ea15Sjeremylt       } else {
10796a35ea15Sjeremylt         // Active
1080668048e2SJed Brown         if (op->inputfields[i].vec == CEED_VECTOR_ACTIVE) {
10816a35ea15Sjeremylt           ierr = CeedVectorRestoreArrayRead(invec,
10826a35ea15Sjeremylt                                             (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr);
1083668048e2SJed Brown           // Passive
1084668048e2SJed Brown         } else {
1085668048e2SJed Brown           ierr = CeedVectorRestoreArrayRead(op->inputfields[i].vec,
1086668048e2SJed Brown                                             (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr);
10876a35ea15Sjeremylt         }
10886a35ea15Sjeremylt       }
1089668048e2SJed Brown     } else {
1090668048e2SJed Brown       // Restriction
1091668048e2SJed Brown       ierr = CeedVectorRestoreArrayRead(opmagma->evecs[iein],
1092668048e2SJed Brown                                         (const CeedScalar **) &opmagma->edata[i]); CeedChk(ierr);
1093668048e2SJed Brown       iein++;
10946a35ea15Sjeremylt     }
10956a35ea15Sjeremylt   }
10966a35ea15Sjeremylt 
109782b77998SStan Tomov   return 0;
109882b77998SStan Tomov }
109982b77998SStan Tomov 
110082b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) {
110182b77998SStan Tomov   CeedOperator_Magma *impl;
110282b77998SStan Tomov   int ierr;
110382b77998SStan Tomov 
110482b77998SStan Tomov   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
110582b77998SStan Tomov   op->data = impl;
110682b77998SStan Tomov   op->Destroy  = CeedOperatorDestroy_Magma;
110782b77998SStan Tomov   op->Apply    = CeedOperatorApply_Magma;
110882b77998SStan Tomov   return 0;
110982b77998SStan Tomov }
111082b77998SStan Tomov 
111182b77998SStan Tomov // *****************************************************************************
111282b77998SStan Tomov // * INIT
111382b77998SStan Tomov // *****************************************************************************
111482b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) {
11151dc2661bSVeselin Dobrev   int ierr;
11162a847359SStan Tomov   if (strcmp(resource, "/gpu/magma"))
111782b77998SStan Tomov     return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource);
111893fbe329SStan Tomov 
11191dc2661bSVeselin Dobrev   ierr = magma_init();
11201dc2661bSVeselin Dobrev   if (ierr) return CeedError(ceed, 1, "error in magma_init(): %d\n", ierr);
112193fbe329SStan Tomov   //magma_print_environment();
112293fbe329SStan Tomov 
112382b77998SStan Tomov   ceed->VecCreate = CeedVectorCreate_Magma;
112482b77998SStan Tomov   ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma;
112582b77998SStan Tomov   ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma;
112682b77998SStan Tomov   ceed->QFunctionCreate = CeedQFunctionCreate_Magma;
112782b77998SStan Tomov   ceed->OperatorCreate = CeedOperatorCreate_Magma;
112882b77998SStan Tomov   return 0;
112982b77998SStan Tomov }
113082b77998SStan Tomov 
113182b77998SStan Tomov // *****************************************************************************
113282b77998SStan Tomov // * REGISTER
113382b77998SStan Tomov // *****************************************************************************
113482b77998SStan Tomov __attribute__((constructor))
113582b77998SStan Tomov static void Register(void) {
113644951fc6Sjeremylt   CeedRegister("/gpu/magma", CeedInit_Magma, 20);
113782b77998SStan Tomov }
1138