xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.c (revision c119dad6925d1c62a7e8353ac8ac74008d2175a8)
182b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
282b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
382b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details.
482b77998SStan Tomov //
582b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
682b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
782b77998SStan Tomov // element discretizations for exascale applications. For more information and
882b77998SStan Tomov // source code availability see http://github.com/ceed.
982b77998SStan Tomov //
1082b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1182b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
1282b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
1382b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
1482b77998SStan Tomov // software, applications, hardware, advanced system engineering and early
1582b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
1682b77998SStan Tomov 
1782b77998SStan Tomov #include <ceed-impl.h>
1882b77998SStan Tomov #include <string.h>
1993fbe329SStan Tomov #include "magma.h"
2082b77998SStan Tomov 
2182b77998SStan Tomov typedef struct {
2282b77998SStan Tomov     CeedScalar *array;
2393fbe329SStan Tomov     CeedScalar *darray;
24e2900954SStan Tomov     int  own_;
25*c119dad6SStan Tomov     int down_;
2682b77998SStan Tomov } CeedVector_Magma;
2782b77998SStan Tomov 
2882b77998SStan Tomov typedef struct {
291751b0a9SStan Tomov     CeedInt *indices;
301751b0a9SStan Tomov     CeedInt *dindices;
311751b0a9SStan Tomov     int  own_;
32*c119dad6SStan Tomov     int down_;            // cover a case where we own Device memory
3382b77998SStan Tomov } CeedElemRestriction_Magma;
3482b77998SStan Tomov 
3582b77998SStan Tomov typedef struct {
3682b77998SStan Tomov     CeedVector etmp;
3782b77998SStan Tomov     CeedVector qdata;
3882b77998SStan Tomov } CeedOperator_Magma;
3982b77998SStan Tomov 
4093fbe329SStan Tomov // *****************************************************************************
4193fbe329SStan Tomov // * Initialize vector vec (after free mem) with values from array based on cmode
4293fbe329SStan Tomov // *   CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal
4393fbe329SStan Tomov // *                     to array, and data is copied (not store passed pointer)
4493fbe329SStan Tomov // *   CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array
4593fbe329SStan Tomov // *   CEED_USE_POINTER: vec->data->array = array (can modify; no ownership)
4693fbe329SStan Tomov // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE
4793fbe329SStan Tomov // *****************************************************************************
4882b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype,
4982b77998SStan Tomov                                     CeedCopyMode cmode, CeedScalar *array) {
5082b77998SStan Tomov     CeedVector_Magma *impl = vec->data;
5182b77998SStan Tomov     int ierr;
5282b77998SStan Tomov 
53*c119dad6SStan Tomov     // If own data, free the "old" data, e.g., as it may be of different size
54e2900954SStan Tomov     if (impl->own_){
55e2900954SStan Tomov         magma_free( impl->darray );
56e2900954SStan Tomov         magma_free_pinned( impl->array );
57e2900954SStan Tomov         impl->darray = NULL;
58e2900954SStan Tomov         impl->array  = NULL;
59e2900954SStan Tomov         impl->own_ = 0;
60*c119dad6SStan Tomov         impl->down_= 0;
61e2900954SStan Tomov     }
6293fbe329SStan Tomov 
63e2900954SStan Tomov     if (mtype == CEED_MEM_HOST) {
64e2900954SStan Tomov         // memory is on the host; own_ = 0
6582b77998SStan Tomov         switch (cmode) {
6682b77998SStan Tomov         case CEED_COPY_VALUES:
6793fbe329SStan Tomov             ierr = magma_malloc( (void**)&impl->darray,
68e2900954SStan Tomov                                  vec->length * sizeof(CeedScalar)); CeedChk(ierr);
69e2900954SStan Tomov             ierr = magma_malloc_pinned( (void**)&impl->array,
70e2900954SStan Tomov                                         vec->length * sizeof(CeedScalar)); CeedChk(ierr);
71e2900954SStan Tomov             impl->own_ = 1;
72e2900954SStan Tomov 
73*c119dad6SStan Tomov             if (array != NULL)
7493fbe329SStan Tomov                 magma_setvector(vec->length, sizeof(array[0]),
7593fbe329SStan Tomov                                 array, 1, impl->darray, 1);
7682b77998SStan Tomov             break;
7782b77998SStan Tomov         case CEED_OWN_POINTER:
7893fbe329SStan Tomov             ierr = magma_malloc( (void**)&impl->darray,
79e2900954SStan Tomov                                  vec->length * sizeof(CeedScalar)); CeedChk(ierr);
801751b0a9SStan Tomov             // TODO: possible problem here is if we are passed non-pinned memory;
811751b0a9SStan Tomov             //       (as we own it, lter in destroy, we use free for pinned memory).
82e2900954SStan Tomov             impl->array = array;
83e2900954SStan Tomov             impl->own_ = 1;
84e2900954SStan Tomov 
85*c119dad6SStan Tomov             if (array != NULL)
8693fbe329SStan Tomov                 magma_setvector(vec->length, sizeof(array[0]),
8793fbe329SStan Tomov                                 array, 1, impl->darray, 1);
8882b77998SStan Tomov             break;
8982b77998SStan Tomov         case CEED_USE_POINTER:
90*c119dad6SStan Tomov             ierr = magma_malloc( (void**)&impl->darray,
91*c119dad6SStan Tomov                                  vec->length * sizeof(CeedScalar)); CeedChk(ierr);
92*c119dad6SStan Tomov             magma_setvector(vec->length, sizeof(array[0]),
93*c119dad6SStan Tomov                             array, 1, impl->darray, 1);
9482b77998SStan Tomov             impl->array  = array;
9582b77998SStan Tomov         }
96e2900954SStan Tomov     } else if (mtype == CEED_MEM_DEVICE) {
97e2900954SStan Tomov         // memory is on the device; own = 0
98e2900954SStan Tomov         switch (cmode) {
99e2900954SStan Tomov         case CEED_COPY_VALUES:
100e2900954SStan Tomov             ierr = magma_malloc( (void**)&impl->darray,
101e2900954SStan Tomov                                 vec->length * sizeof(CeedScalar)); CeedChk(ierr);
102e2900954SStan Tomov             ierr = magma_malloc_pinned( (void**)&impl->array,
103e2900954SStan Tomov                                         vec->length * sizeof(CeedScalar)); CeedChk(ierr);
104e2900954SStan Tomov             impl->own_ = 1;
105e2900954SStan Tomov 
106e2900954SStan Tomov             if (array)
107e2900954SStan Tomov                 magma_copyvector(vec->length, sizeof(array[0]),
108e2900954SStan Tomov                                  array, 1, impl->darray, 1);
109e2900954SStan Tomov             break;
110e2900954SStan Tomov         case CEED_OWN_POINTER:
111e2900954SStan Tomov             impl->darray = array;
112e2900954SStan Tomov             ierr = magma_malloc_pinned( (void**)&impl->array,
113e2900954SStan Tomov                                         vec->length * sizeof(CeedScalar)); CeedChk(ierr);
114e2900954SStan Tomov             impl->own_ = 1;
115e2900954SStan Tomov 
116e2900954SStan Tomov             break;
117e2900954SStan Tomov         case CEED_USE_POINTER:
118e2900954SStan Tomov             impl->darray = array;
119e2900954SStan Tomov             impl->array  = NULL;
120e2900954SStan Tomov         }
121e2900954SStan Tomov 
122e2900954SStan Tomov     } else
123e2900954SStan Tomov         return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported");
124e2900954SStan Tomov 
12582b77998SStan Tomov     return 0;
12682b77998SStan Tomov }
12782b77998SStan Tomov 
12893fbe329SStan Tomov // *****************************************************************************
129e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE)
13093fbe329SStan Tomov // *****************************************************************************
13182b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype,
13282b77998SStan Tomov                                     CeedScalar **array) {
13382b77998SStan Tomov     CeedVector_Magma *impl = vec->data;
13482b77998SStan Tomov     int ierr;
13582b77998SStan Tomov 
136e2900954SStan Tomov     if (mtype == CEED_MEM_HOST) {
137*c119dad6SStan Tomov         if (impl->own_) {
138e2900954SStan Tomov             // data is owned so GPU had the most up-to-date version; copy it
139e2900954SStan Tomov             magma_getvector(vec->length, sizeof(*array[0]),
140e2900954SStan Tomov                             impl->darray, 1, impl->array, 1);
141*c119dad6SStan Tomov         } else if (impl->array == NULL) {
142*c119dad6SStan Tomov             // Vector doesn't own the data and was set on GPU
143*c119dad6SStan Tomov             if (impl->darray == NULL) {
144*c119dad6SStan Tomov                 // call was made just to allocate memory
145*c119dad6SStan Tomov                 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
146*c119dad6SStan Tomov                 CeedChk(ierr);
147*c119dad6SStan Tomov             } else
148*c119dad6SStan Tomov                 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST");
14982b77998SStan Tomov         }
15082b77998SStan Tomov         *array = impl->array;
151e2900954SStan Tomov     } else if (mtype == CEED_MEM_DEVICE) {
152*c119dad6SStan Tomov         if (impl->darray == NULL){
153*c119dad6SStan Tomov             // Vector doesn't own the data and was set on the CPU
154*c119dad6SStan Tomov             if (impl->array == NULL) {
155*c119dad6SStan Tomov                 // call was made just to allocate memory
156e2900954SStan Tomov                 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
157e2900954SStan Tomov                 CeedChk(ierr);
158*c119dad6SStan Tomov             } else
159*c119dad6SStan Tomov                 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE");
160e2900954SStan Tomov         }
161e2900954SStan Tomov         *array = impl->darray;
162e2900954SStan Tomov     } else
163e2900954SStan Tomov         return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory");
164e2900954SStan Tomov 
16582b77998SStan Tomov     return 0;
16682b77998SStan Tomov }
16782b77998SStan Tomov 
16893fbe329SStan Tomov // *****************************************************************************
169e2900954SStan Tomov // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it
17093fbe329SStan Tomov // *****************************************************************************
17182b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype,
17282b77998SStan Tomov                                         const CeedScalar **array) {
17382b77998SStan Tomov     CeedVector_Magma *impl = vec->data;
17482b77998SStan Tomov     int ierr;
17582b77998SStan Tomov 
176e2900954SStan Tomov     if (mtype == CEED_MEM_HOST) {
177*c119dad6SStan Tomov         if (impl->own_) {
178e2900954SStan Tomov             // data is owned so GPU had the most up-to-date version; copy it
179e2900954SStan Tomov             magma_getvector(vec->length, sizeof(*array[0]),
180e2900954SStan Tomov                             impl->darray, 1, impl->array, 1);
181*c119dad6SStan Tomov         } else if (impl->array == NULL) {
182*c119dad6SStan Tomov             // Vector doesn't own the data and was set on GPU
183*c119dad6SStan Tomov             if (impl->darray == NULL) {
184*c119dad6SStan Tomov                 // call was made just to allocate memory
185*c119dad6SStan Tomov                 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
186*c119dad6SStan Tomov                 CeedChk(ierr);
187*c119dad6SStan Tomov             } else
188*c119dad6SStan Tomov                 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST");
18982b77998SStan Tomov         }
19082b77998SStan Tomov         *array = impl->array;
191e2900954SStan Tomov     } else if (mtype == CEED_MEM_DEVICE) {
192*c119dad6SStan Tomov         if (impl->darray == NULL){
193*c119dad6SStan Tomov             // Vector doesn't own the data and was set on the CPU
194*c119dad6SStan Tomov             if (impl->array == NULL) {
195*c119dad6SStan Tomov                 // call was made just to allocate memory
196e2900954SStan Tomov                 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
197e2900954SStan Tomov                 CeedChk(ierr);
198*c119dad6SStan Tomov             } else
199*c119dad6SStan Tomov                 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE");
200e2900954SStan Tomov         }
201e2900954SStan Tomov         *array = impl->darray;
202e2900954SStan Tomov     } else
203e2900954SStan Tomov         return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory");
204e2900954SStan Tomov 
20582b77998SStan Tomov     return 0;
20682b77998SStan Tomov }
20782b77998SStan Tomov 
20893fbe329SStan Tomov // *****************************************************************************
209e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST
210e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if
211e2900954SStan Tomov // * it was GPU memory we would not call this routine at all.
21293fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values
21393fbe329SStan Tomov // * from vec and possibly modified them.
21493fbe329SStan Tomov // *****************************************************************************
21582b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) {
21693fbe329SStan Tomov     CeedVector_Magma *impl = vec->data;
21793fbe329SStan Tomov 
218*c119dad6SStan Tomov     // Check if the array is a CPU pointer
219*c119dad6SStan Tomov     if (*array == impl->array) {
220*c119dad6SStan Tomov         // Update device, if the device pointer is not NULL
221*c119dad6SStan Tomov         if (impl->darray != NULL) {
22293fbe329SStan Tomov             magma_setvector(vec->length, sizeof(*array[0]),
22393fbe329SStan Tomov                             *array, 1, impl->darray, 1);
224*c119dad6SStan Tomov         } else
225*c119dad6SStan Tomov             ; // nothing to do (case of CPU use pointer)
226*c119dad6SStan Tomov 
227*c119dad6SStan Tomov     } else if (impl->down_) {
228*c119dad6SStan Tomov         // nothing to do if array is on GPU, except if down_=1(case CPU use pointer)
229*c119dad6SStan Tomov         magma_getvector(vec->length, sizeof(*array[0]),
230*c119dad6SStan Tomov                         impl->darray, 1, impl->array, 1);
231*c119dad6SStan Tomov     }
232e2900954SStan Tomov 
23382b77998SStan Tomov     *array = NULL;
23482b77998SStan Tomov     return 0;
23582b77998SStan Tomov }
23682b77998SStan Tomov 
23793fbe329SStan Tomov // *****************************************************************************
238e2900954SStan Tomov // * There is no mtype here for array so it is not clear if we restore from HOST
239e2900954SStan Tomov // * memory or from DEVICE memory. We assume that it is CPU memory because if
240e2900954SStan Tomov // * it was GPU memory we would not call this routine at all.
24193fbe329SStan Tomov // * Restore vector vec with values from array, where array received its values
24293fbe329SStan Tomov // * from vec to only read them; in this case vec may have been modified meanwhile
24393fbe329SStan Tomov // * and needs to be restored here.
24493fbe329SStan Tomov // *****************************************************************************
24582b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec,
24682b77998SStan Tomov                                             const CeedScalar **array) {
24793fbe329SStan Tomov     CeedVector_Magma *impl = vec->data;
24893fbe329SStan Tomov 
249*c119dad6SStan Tomov     // Check if the array is a CPU pointer
250*c119dad6SStan Tomov     if (*array == impl->array) {
251*c119dad6SStan Tomov         // Update device, if the device pointer is not NULL
252e2900954SStan Tomov         if (impl->darray != NULL)
25393fbe329SStan Tomov             magma_setvector(vec->length, sizeof(*array[0]),
25493fbe329SStan Tomov                             *array, 1, impl->darray, 1);
255*c119dad6SStan Tomov         else
256*c119dad6SStan Tomov             ; // nothing to do (case of CPU use pointer)
257*c119dad6SStan Tomov 
258*c119dad6SStan Tomov     } else if (impl->down_) {
259*c119dad6SStan Tomov         // nothing to do if array is on GPU, except if down_=1(case CPU use pointer)
260*c119dad6SStan Tomov         magma_getvector(vec->length, sizeof(*array[0]),
261*c119dad6SStan Tomov                         impl->darray, 1, impl->array, 1);
262*c119dad6SStan Tomov     }
263e2900954SStan Tomov 
26482b77998SStan Tomov     *array = NULL;
26582b77998SStan Tomov     return 0;
26682b77998SStan Tomov }
26782b77998SStan Tomov 
26882b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) {
26982b77998SStan Tomov     CeedVector_Magma *impl = vec->data;
27082b77998SStan Tomov     int ierr;
27182b77998SStan Tomov 
272e2900954SStan Tomov     // Free if we own the data
273e2900954SStan Tomov     if (impl->own_){
274e2900954SStan Tomov         ierr = magma_free_pinned(impl->array); CeedChk(ierr);
27593fbe329SStan Tomov         ierr = magma_free(impl->darray);       CeedChk(ierr);
276*c119dad6SStan Tomov     } else if (impl->down_){
277*c119dad6SStan Tomov         ierr = magma_free(impl->darray);       CeedChk(ierr);
278e2900954SStan Tomov     }
27982b77998SStan Tomov     ierr = CeedFree(&vec->data); CeedChk(ierr);
28082b77998SStan Tomov     return 0;
28182b77998SStan Tomov }
28282b77998SStan Tomov 
28393fbe329SStan Tomov // *****************************************************************************
28493fbe329SStan Tomov // * Create vector vec of size n
28593fbe329SStan Tomov // *****************************************************************************
28682b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) {
28782b77998SStan Tomov     CeedVector_Magma *impl;
28882b77998SStan Tomov     int ierr;
28982b77998SStan Tomov 
29082b77998SStan Tomov     vec->SetArray = CeedVectorSetArray_Magma;
29182b77998SStan Tomov     vec->GetArray = CeedVectorGetArray_Magma;
29282b77998SStan Tomov     vec->GetArrayRead = CeedVectorGetArrayRead_Magma;
29382b77998SStan Tomov     vec->RestoreArray = CeedVectorRestoreArray_Magma;
29482b77998SStan Tomov     vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma;
29582b77998SStan Tomov     vec->Destroy = CeedVectorDestroy_Magma;
29682b77998SStan Tomov     ierr = CeedCalloc(1,&impl); CeedChk(ierr);
29793fbe329SStan Tomov     impl->darray = NULL;
298e2900954SStan Tomov     impl->array  = NULL;
299e2900954SStan Tomov     impl->own_ = 0;
300*c119dad6SStan Tomov     impl->down_= 0;
30182b77998SStan Tomov     vec->data = impl;
30282b77998SStan Tomov     return 0;
30382b77998SStan Tomov }
30482b77998SStan Tomov 
30593fbe329SStan Tomov // *****************************************************************************
30693fbe329SStan Tomov // * Apply restriction operator r to u: v = r(rmode) u
30793fbe329SStan Tomov // *****************************************************************************
30882b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r,
30982b77998SStan Tomov                                           CeedTransposeMode tmode, CeedInt ncomp,
31082b77998SStan Tomov                                           CeedTransposeMode lmode, CeedVector u,
31182b77998SStan Tomov                                           CeedVector v, CeedRequest *request) {
31282b77998SStan Tomov     CeedElemRestriction_Magma *impl = r->data;
31382b77998SStan Tomov     int ierr;
31482b77998SStan Tomov     const CeedScalar *uu;
31582b77998SStan Tomov     CeedScalar *vv;
316*c119dad6SStan Tomov     CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof;
317*c119dad6SStan Tomov     CeedInt esize = nelem * elemsize;
318*c119dad6SStan Tomov     CeedInt *indices = impl->indices, *dindices = impl->dindices;
31982b77998SStan Tomov 
320*c119dad6SStan Tomov     //ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
321*c119dad6SStan Tomov     //ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
322*c119dad6SStan Tomov 
323*c119dad6SStan Tomov     // Get pointers on the device
324*c119dad6SStan Tomov     ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr);
325*c119dad6SStan Tomov     ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr);
326*c119dad6SStan Tomov 
32782b77998SStan Tomov     if (tmode == CEED_NOTRANSPOSE) {
32882b77998SStan Tomov         // Perform: v = r * u
32982b77998SStan Tomov         if (ncomp == 1) {
330*c119dad6SStan Tomov             //for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]];
331e2900954SStan Tomov 
332*c119dad6SStan Tomov             magma_template<<i=0:esize>>
333*c119dad6SStan Tomov                 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices){
334*c119dad6SStan Tomov                 vv[i] = uu[dindices[i]];
335e2900954SStan Tomov             }
33682b77998SStan Tomov         } else {
33782b77998SStan Tomov             // vv is (elemsize x ncomp x nelem), column-major
33882b77998SStan Tomov             if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
339*c119dad6SStan Tomov                 /*
340*c119dad6SStan Tomov                 for (CeedInt e = 0; e < nelem; e++)
34182b77998SStan Tomov                     for (CeedInt d = 0; d < ncomp; d++)
342*c119dad6SStan Tomov                         for (CeedInt i=0; i < elemsize; i++) {
343*c119dad6SStan Tomov                             vv[i + elemsize*(d+ncomp*e)] =
344*c119dad6SStan Tomov                                 uu[indices[i+elemsize*e]+ndof*d];
345*c119dad6SStan Tomov                         }
346*c119dad6SStan Tomov                 */
347*c119dad6SStan Tomov                 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
348*c119dad6SStan Tomov                     (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) {
349*c119dad6SStan Tomov                     vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d];
35082b77998SStan Tomov                 }
35182b77998SStan Tomov             } else { // u is (ncomp x ndof), column-major
352*c119dad6SStan Tomov                 /*
353*c119dad6SStan Tomov                 for (CeedInt e = 0; e < nelem; e++)
35482b77998SStan Tomov                     for (CeedInt d = 0; d < ncomp; d++)
355*c119dad6SStan Tomov                         for (CeedInt i=0; i< elemsize; i++) {
356*c119dad6SStan Tomov                             vv[i + elemsize*(d+ncomp*e)] =
357*c119dad6SStan Tomov                                 uu[d+ncomp*indices[i+elemsize*e]];
358*c119dad6SStan Tomov                         }
359*c119dad6SStan Tomov                 */
360*c119dad6SStan Tomov                 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
361*c119dad6SStan Tomov                     (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
362*c119dad6SStan Tomov                     vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]];
36382b77998SStan Tomov                 }
36482b77998SStan Tomov             }
36582b77998SStan Tomov         }
36682b77998SStan Tomov     } else {
36782b77998SStan Tomov         // Note: in transpose mode, we perform: v += r^t * u
36882b77998SStan Tomov         if (ncomp == 1) {
369*c119dad6SStan Tomov             // fprintf(stderr,"3 ---------\n");
370*c119dad6SStan Tomov             // for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i];
371*c119dad6SStan Tomov             magma_template<<i=0:esize>>
372*c119dad6SStan Tomov                 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
373*c119dad6SStan Tomov                 magmablas_datomic_add( &vv[dindices[i]], uu[i]);
374*c119dad6SStan Tomov             }
375*c119dad6SStan Tomov         } else { // u is (elemsize x ncomp x nelem)
376*c119dad6SStan Tomov             fprintf(stderr,"2 ---------\n");
377*c119dad6SStan Tomov 
37882b77998SStan Tomov             if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
379*c119dad6SStan Tomov                 /*
380*c119dad6SStan Tomov                 for (CeedInt e = 0; e < nelem; e++)
38182b77998SStan Tomov                     for (CeedInt d = 0; d < ncomp; d++)
382*c119dad6SStan Tomov                         for (CeedInt i=0; i < elemsize; i++) {
383*c119dad6SStan Tomov                             vv[indices[i + elemsize*e]+ndof*d] +=
384*c119dad6SStan Tomov                                 uu[i + elemsize*(d+e*ncomp)];
385*c119dad6SStan Tomov                         }
386*c119dad6SStan Tomov                 */
387*c119dad6SStan Tomov                 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
388*c119dad6SStan Tomov                     (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) {
389*c119dad6SStan Tomov                     magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d],
390*c119dad6SStan Tomov                                            uu[i+iend*(d+e*dend)]);
39182b77998SStan Tomov                 }
39282b77998SStan Tomov             } else { // vv is (ncomp x ndof), column-major
393*c119dad6SStan Tomov                 /*
394*c119dad6SStan Tomov                 for (CeedInt e = 0; e < nelem; e++)
39582b77998SStan Tomov                     for (CeedInt d = 0; d < ncomp; d++)
396*c119dad6SStan Tomov                         for (CeedInt i=0; i < elemsize; i++) {
397*c119dad6SStan Tomov                             vv[d+ncomp*indices[i + elemsize*e]] +=
398*c119dad6SStan Tomov                                 uu[i + elemsize*(d+e*ncomp)];
399*c119dad6SStan Tomov                         }
400*c119dad6SStan Tomov                 */
401*c119dad6SStan Tomov                 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>>
402*c119dad6SStan Tomov                     (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) {
403*c119dad6SStan Tomov                     magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]],
404*c119dad6SStan Tomov                                            uu[i+iend*(d+e*dend)]);
40582b77998SStan Tomov                 }
40682b77998SStan Tomov             }
40782b77998SStan Tomov         }
40882b77998SStan Tomov     }
40982b77998SStan Tomov     ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
41082b77998SStan Tomov     ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
411*c119dad6SStan Tomov 
41282b77998SStan Tomov     if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
41382b77998SStan Tomov         *request = NULL;
41482b77998SStan Tomov     return 0;
41582b77998SStan Tomov }
41682b77998SStan Tomov 
41782b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) {
41882b77998SStan Tomov   CeedElemRestriction_Magma *impl = r->data;
41982b77998SStan Tomov   int ierr;
42082b77998SStan Tomov 
4211751b0a9SStan Tomov   // Free if we own the data
4221751b0a9SStan Tomov   if (impl->own_){
4231751b0a9SStan Tomov       ierr = magma_free_pinned(impl->indices); CeedChk(ierr);
4241751b0a9SStan Tomov       ierr = magma_free(impl->dindices);       CeedChk(ierr);
4251751b0a9SStan Tomov   }
426*c119dad6SStan Tomov   else if (impl->down_){
427*c119dad6SStan Tomov       ierr = magma_free(impl->dindices);       CeedChk(ierr);
428*c119dad6SStan Tomov   }
42982b77998SStan Tomov   ierr = CeedFree(&r->data); CeedChk(ierr);
43082b77998SStan Tomov   return 0;
43182b77998SStan Tomov }
43282b77998SStan Tomov 
43382b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r,
43482b77998SStan Tomov                                            CeedMemType mtype,
43593fbe329SStan Tomov                                            CeedCopyMode cmode,
43693fbe329SStan Tomov                                            const CeedInt *indices) {
4371751b0a9SStan Tomov     int ierr, size = r->nelem*r->elemsize;
43882b77998SStan Tomov     CeedElemRestriction_Magma *impl;
43982b77998SStan Tomov 
4401751b0a9SStan Tomov     // Allocate memory for the MAGMA Restricton and initializa pointers to NULL
44182b77998SStan Tomov     ierr = CeedCalloc(1,&impl); CeedChk(ierr);
4421751b0a9SStan Tomov     impl->dindices = NULL;
4431751b0a9SStan Tomov     impl->indices  = NULL;
4441751b0a9SStan Tomov     impl->own_ = 0;
445*c119dad6SStan Tomov     impl->down_= 0;
4461751b0a9SStan Tomov 
4471751b0a9SStan Tomov     if (mtype == CEED_MEM_HOST) {
4481751b0a9SStan Tomov         // memory is on the host; own_ = 0
44982b77998SStan Tomov         switch (cmode) {
45082b77998SStan Tomov         case CEED_COPY_VALUES:
4511751b0a9SStan Tomov             ierr = magma_malloc( (void**)&impl->dindices,
4521751b0a9SStan Tomov                                   size * sizeof(CeedInt)); CeedChk(ierr);
4531751b0a9SStan Tomov             ierr = magma_malloc_pinned( (void**)&impl->indices,
4541751b0a9SStan Tomov                                         size * sizeof(CeedInt)); CeedChk(ierr);
4551751b0a9SStan Tomov             impl->own_ = 1;
4561751b0a9SStan Tomov 
457*c119dad6SStan Tomov             if (indices != NULL)
4581751b0a9SStan Tomov                 magma_setvector(size, sizeof(CeedInt),
4591751b0a9SStan Tomov                                 indices, 1, impl->dindices, 1);
46082b77998SStan Tomov             break;
46182b77998SStan Tomov         case CEED_OWN_POINTER:
4621751b0a9SStan Tomov             ierr = magma_malloc( (void**)&impl->dindices,
4631751b0a9SStan Tomov                                  size * sizeof(CeedInt)); CeedChk(ierr);
4641751b0a9SStan Tomov             // TODO: possible problem here is if we are passed non-pinned memory;
4651751b0a9SStan Tomov             //       (as we own it, lter in destroy, we use free for pinned memory).
4661751b0a9SStan Tomov             impl->indices = indices;
4671751b0a9SStan Tomov             impl->own_ = 1;
4681751b0a9SStan Tomov 
469*c119dad6SStan Tomov             if (indices != NULL)
4701751b0a9SStan Tomov                 magma_setvector(size, sizeof(CeedInt),
4711751b0a9SStan Tomov                                 indices, 1, impl->dindices, 1);
47282b77998SStan Tomov             break;
47382b77998SStan Tomov         case CEED_USE_POINTER:
474*c119dad6SStan Tomov             ierr = magma_malloc( (void**)&impl->dindices,
475*c119dad6SStan Tomov                                  size * sizeof(CeedInt)); CeedChk(ierr);
476*c119dad6SStan Tomov             magma_setvector(size, sizeof(CeedInt),
477*c119dad6SStan Tomov                             indices, 1, impl->dindices, 1);
478*c119dad6SStan Tomov             impl->down_ = 1;
47982b77998SStan Tomov             impl->indices  = indices;
48082b77998SStan Tomov         }
4811751b0a9SStan Tomov     } else if (mtype == CEED_MEM_DEVICE) {
4821751b0a9SStan Tomov         // memory is on the device; own = 0
4831751b0a9SStan Tomov         switch (cmode) {
4841751b0a9SStan Tomov         case CEED_COPY_VALUES:
4851751b0a9SStan Tomov             ierr = magma_malloc( (void**)&impl->dindices,
4861751b0a9SStan Tomov                                  size * sizeof(CeedInt)); CeedChk(ierr);
4871751b0a9SStan Tomov             ierr = magma_malloc_pinned( (void**)&impl->indices,
4881751b0a9SStan Tomov                                         size * sizeof(CeedInt)); CeedChk(ierr);
4891751b0a9SStan Tomov             impl->own_ = 1;
4901751b0a9SStan Tomov 
4911751b0a9SStan Tomov             if (indices)
4921751b0a9SStan Tomov                 magma_copyvector(size, sizeof(CeedInt),
4931751b0a9SStan Tomov                                  indices, 1, impl->dindices, 1);
4941751b0a9SStan Tomov             break;
4951751b0a9SStan Tomov         case CEED_OWN_POINTER:
4961751b0a9SStan Tomov             impl->dindices = indices;
4971751b0a9SStan Tomov             ierr = magma_malloc_pinned( (void**)&impl->indices,
4981751b0a9SStan Tomov                                         size * sizeof(CeedInt)); CeedChk(ierr);
4991751b0a9SStan Tomov             impl->own_ = 1;
5001751b0a9SStan Tomov 
5011751b0a9SStan Tomov             break;
5021751b0a9SStan Tomov         case CEED_USE_POINTER:
5031751b0a9SStan Tomov             impl->dindices = indices;
5041751b0a9SStan Tomov             impl->indices  = NULL;
5051751b0a9SStan Tomov         }
5061751b0a9SStan Tomov 
5071751b0a9SStan Tomov     } else
5081751b0a9SStan Tomov         return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported");
5091751b0a9SStan Tomov 
51082b77998SStan Tomov     r->data    = impl;
51182b77998SStan Tomov     r->Apply   = CeedElemRestrictionApply_Magma;
51282b77998SStan Tomov     r->Destroy = CeedElemRestrictionDestroy_Magma;
5131751b0a9SStan Tomov 
51482b77998SStan Tomov     return 0;
51582b77998SStan Tomov }
51682b77998SStan Tomov 
51782b77998SStan Tomov // Contracts on the middle index
51882b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc
51982b77998SStan Tomov // TRANSPOSE:   V_ajc = T_bj U_abc
52082b77998SStan Tomov // If Add != 0, "=" is replaced by "+="
52182b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed,
52282b77998SStan Tomov                                     CeedInt A, CeedInt B, CeedInt C, CeedInt J,
52382b77998SStan Tomov                                     const CeedScalar *t, CeedTransposeMode tmode,
52482b77998SStan Tomov                                     const CeedInt Add,
52582b77998SStan Tomov                                     const CeedScalar *u, CeedScalar *v) {
52682b77998SStan Tomov     CeedInt tstride0 = B, tstride1 = 1;
52782b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
52882b77998SStan Tomov         tstride0 = 1; tstride1 = J;
52982b77998SStan Tomov     }
53082b77998SStan Tomov 
53182b77998SStan Tomov     for (CeedInt a=0; a<A; a++) {
53282b77998SStan Tomov         for (CeedInt j=0; j<J; j++) {
53382b77998SStan Tomov             if (!Add) {
53482b77998SStan Tomov                 for (CeedInt c=0; c<C; c++)
53582b77998SStan Tomov                     v[(a*J+j)*C+c] = 0;
53682b77998SStan Tomov             }
53782b77998SStan Tomov             for (CeedInt b=0; b<B; b++) {
53882b77998SStan Tomov                 for (CeedInt c=0; c<C; c++) {
53982b77998SStan Tomov                     v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c];
54082b77998SStan Tomov                 }
54182b77998SStan Tomov             }
54282b77998SStan Tomov         }
54382b77998SStan Tomov     }
54482b77998SStan Tomov     return 0;
54582b77998SStan Tomov }
54682b77998SStan Tomov 
54782b77998SStan Tomov static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode,
54882b77998SStan Tomov                                 CeedEvalMode emode,
54982b77998SStan Tomov                                 const CeedScalar *u, CeedScalar *v) {
55082b77998SStan Tomov     int ierr;
55182b77998SStan Tomov     const CeedInt dim = basis->dim;
55282b77998SStan Tomov     const CeedInt ndof = basis->ndof;
55382b77998SStan Tomov     const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim);
55482b77998SStan Tomov     const CeedInt add = (tmode == CEED_TRANSPOSE);
55582b77998SStan Tomov 
55682b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
55782b77998SStan Tomov         const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim);
55882b77998SStan Tomov         for (CeedInt i = 0; i < vsize; i++)
55982b77998SStan Tomov             v[i] = (CeedScalar) 0;
56082b77998SStan Tomov     }
56182b77998SStan Tomov     if (emode & CEED_EVAL_INTERP) {
56282b77998SStan Tomov         CeedInt P = basis->P1d, Q = basis->Q1d;
56382b77998SStan Tomov         if (tmode == CEED_TRANSPOSE) {
56482b77998SStan Tomov             P = basis->Q1d; Q = basis->P1d;
56582b77998SStan Tomov         }
56682b77998SStan Tomov         CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
56782b77998SStan Tomov         CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
56882b77998SStan Tomov         for (CeedInt d=0; d<dim; d++) {
56982b77998SStan Tomov             ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d,
57082b77998SStan Tomov                                             tmode, add&&(d==dim-1),
57182b77998SStan Tomov                                             d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
57282b77998SStan Tomov             CeedChk(ierr);
57382b77998SStan Tomov             pre /= P;
57482b77998SStan Tomov             post *= Q;
57582b77998SStan Tomov         }
57682b77998SStan Tomov         if (tmode == CEED_NOTRANSPOSE) {
57782b77998SStan Tomov             v += nqpt;
57882b77998SStan Tomov         } else {
57982b77998SStan Tomov             u += nqpt;
58082b77998SStan Tomov         }
58182b77998SStan Tomov     }
58282b77998SStan Tomov     if (emode & CEED_EVAL_GRAD) {
58382b77998SStan Tomov         CeedInt P = basis->P1d, Q = basis->Q1d;
58482b77998SStan Tomov         // In CEED_NOTRANSPOSE mode:
58582b77998SStan Tomov         // u is (P^dim x nc), column-major layout (nc = ndof)
58682b77998SStan Tomov         // v is (Q^dim x nc x dim), column-major layout (nc = ndof)
58782b77998SStan Tomov         // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
58882b77998SStan Tomov         if (tmode == CEED_TRANSPOSE) {
58982b77998SStan Tomov             P = basis->Q1d, Q = basis->P1d;
59082b77998SStan Tomov         }
59182b77998SStan Tomov         CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
59282b77998SStan Tomov         for (CeedInt p = 0; p < dim; p++) {
59382b77998SStan Tomov             CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
59482b77998SStan Tomov             for (CeedInt d=0; d<dim; d++) {
59582b77998SStan Tomov                 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q,
59682b77998SStan Tomov                                                 (p==d)?basis->grad1d:basis->interp1d,
59782b77998SStan Tomov                                                 tmode, add&&(d==dim-1),
59882b77998SStan Tomov                                                 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
59982b77998SStan Tomov                 CeedChk(ierr);
60082b77998SStan Tomov                 pre /= P;
60182b77998SStan Tomov                 post *= Q;
60282b77998SStan Tomov             }
60382b77998SStan Tomov             if (tmode == CEED_NOTRANSPOSE) {
60482b77998SStan Tomov                 v += nqpt;
60582b77998SStan Tomov             } else {
60682b77998SStan Tomov                 u += nqpt;
60782b77998SStan Tomov             }
60882b77998SStan Tomov         }
60982b77998SStan Tomov     }
61082b77998SStan Tomov     if (emode & CEED_EVAL_WEIGHT) {
61182b77998SStan Tomov         if (tmode == CEED_TRANSPOSE)
61282b77998SStan Tomov             return CeedError(basis->ceed, 1,
61382b77998SStan Tomov                              "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
61482b77998SStan Tomov         CeedInt Q = basis->Q1d;
61582b77998SStan Tomov         for (CeedInt d=0; d<dim; d++) {
61682b77998SStan Tomov             CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d);
61782b77998SStan Tomov             for (CeedInt i=0; i<pre; i++) {
61882b77998SStan Tomov                 for (CeedInt j=0; j<Q; j++) {
61982b77998SStan Tomov                     for (CeedInt k=0; k<post; k++) {
62082b77998SStan Tomov                         v[(i*Q + j)*post + k] = basis->qweight1d[j]
62182b77998SStan Tomov                             * (d == 0 ? 1 : v[(i*Q + j)*post + k]);
62282b77998SStan Tomov                     }
62382b77998SStan Tomov                 }
62482b77998SStan Tomov             }
62582b77998SStan Tomov         }
62682b77998SStan Tomov     }
62782b77998SStan Tomov     return 0;
62882b77998SStan Tomov }
62982b77998SStan Tomov 
63082b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) {
63182b77998SStan Tomov     return 0;
63282b77998SStan Tomov }
63382b77998SStan Tomov 
63482b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d,
63582b77998SStan Tomov                                          CeedInt Q1d, const CeedScalar *interp1d,
63682b77998SStan Tomov                                          const CeedScalar *grad1d,
63782b77998SStan Tomov                                          const CeedScalar *qref1d,
63882b77998SStan Tomov                                          const CeedScalar *qweight1d,
63982b77998SStan Tomov                                          CeedBasis basis) {
64082b77998SStan Tomov     basis->Apply = CeedBasisApply_Magma;
64182b77998SStan Tomov     basis->Destroy = CeedBasisDestroy_Magma;
64282b77998SStan Tomov     return 0;
64382b77998SStan Tomov }
64482b77998SStan Tomov 
64582b77998SStan Tomov static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q,
64682b77998SStan Tomov                                     const CeedScalar *const *u,
64782b77998SStan Tomov                                     CeedScalar *const *v) {
64882b77998SStan Tomov     int ierr;
64982b77998SStan Tomov     ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr);
65082b77998SStan Tomov     return 0;
65182b77998SStan Tomov }
65282b77998SStan Tomov 
65382b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) {
65482b77998SStan Tomov     return 0;
65582b77998SStan Tomov }
65682b77998SStan Tomov 
65782b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) {
65882b77998SStan Tomov     qf->Apply = CeedQFunctionApply_Magma;
65982b77998SStan Tomov     qf->Destroy = CeedQFunctionDestroy_Magma;
66082b77998SStan Tomov     return 0;
66182b77998SStan Tomov }
66282b77998SStan Tomov 
66382b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) {
66482b77998SStan Tomov     CeedOperator_Magma *impl = op->data;
66582b77998SStan Tomov     int ierr;
66682b77998SStan Tomov 
66782b77998SStan Tomov     ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr);
66882b77998SStan Tomov     ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr);
66982b77998SStan Tomov     ierr = CeedFree(&op->data); CeedChk(ierr);
67082b77998SStan Tomov     return 0;
67182b77998SStan Tomov }
67282b77998SStan Tomov 
67382b77998SStan Tomov static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata,
67482b77998SStan Tomov                                    CeedVector ustate,
67582b77998SStan Tomov                                    CeedVector residual, CeedRequest *request) {
67682b77998SStan Tomov     CeedOperator_Magma *impl = op->data;
67782b77998SStan Tomov     CeedVector etmp;
67882b77998SStan Tomov     CeedInt Q;
67982b77998SStan Tomov     const CeedInt nc = op->basis->ndof, dim = op->basis->dim;
68082b77998SStan Tomov     CeedScalar *Eu;
68182b77998SStan Tomov     char *qd;
68282b77998SStan Tomov     int ierr;
68382b77998SStan Tomov     CeedTransposeMode lmode = CEED_NOTRANSPOSE;
68482b77998SStan Tomov 
68582b77998SStan Tomov     if (!impl->etmp) {
68682b77998SStan Tomov         ierr = CeedVectorCreate(op->ceed,
68782b77998SStan Tomov                                 nc * op->Erestrict->nelem * op->Erestrict->elemsize,
68882b77998SStan Tomov                                 &impl->etmp); CeedChk(ierr);
68982b77998SStan Tomov         // etmp is allocated when CeedVectorGetArray is called below
69082b77998SStan Tomov     }
69182b77998SStan Tomov     etmp = impl->etmp;
69282b77998SStan Tomov     if (op->qf->inmode & ~CEED_EVAL_WEIGHT) {
69382b77998SStan Tomov         ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE,
69482b77998SStan Tomov                                         nc, lmode, ustate, etmp,
69582b77998SStan Tomov                                         CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
69682b77998SStan Tomov     }
69782b77998SStan Tomov     ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
69882b77998SStan Tomov     ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr);
69982b77998SStan Tomov     ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd);
70082b77998SStan Tomov     CeedChk(ierr);
70182b77998SStan Tomov     for (CeedInt e=0; e<op->Erestrict->nelem; e++) {
70282b77998SStan Tomov         CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0};
70382b77998SStan Tomov         const CeedScalar *in[5] = {0,0,0,0,0};
70482b77998SStan Tomov         // TODO: quadrature weights can be computed just once
70582b77998SStan Tomov         ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode,
70682b77998SStan Tomov                               &Eu[e*op->Erestrict->elemsize*nc], BEu);
70782b77998SStan Tomov         CeedChk(ierr);
70882b77998SStan Tomov         CeedScalar *u_ptr = BEu, *v_ptr = BEv;
70982b77998SStan Tomov         if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; }
71082b77998SStan Tomov         if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; }
71182b77998SStan Tomov         if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; }
71282b77998SStan Tomov         if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; }
71382b77998SStan Tomov         if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; }
71482b77998SStan Tomov         ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out);
71582b77998SStan Tomov         CeedChk(ierr);
71682b77998SStan Tomov         ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv,
71782b77998SStan Tomov                               &Eu[e*op->Erestrict->elemsize*nc]);
71882b77998SStan Tomov         CeedChk(ierr);
71982b77998SStan Tomov     }
72082b77998SStan Tomov     ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr);
72182b77998SStan Tomov     if (residual) {
72282b77998SStan Tomov         CeedScalar *res;
72382b77998SStan Tomov         CeedVectorGetArray(residual, CEED_MEM_HOST, &res);
72482b77998SStan Tomov         for (int i = 0; i < residual->length; i++)
72582b77998SStan Tomov             res[i] = (CeedScalar)0;
72682b77998SStan Tomov         ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE,
72782b77998SStan Tomov                                         nc, lmode, etmp, residual,
72882b77998SStan Tomov                                         CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
72982b77998SStan Tomov     }
73082b77998SStan Tomov     if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
73182b77998SStan Tomov         *request = NULL;
73282b77998SStan Tomov     return 0;
73382b77998SStan Tomov }
73482b77998SStan Tomov 
73582b77998SStan Tomov static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) {
73682b77998SStan Tomov     CeedOperator_Magma *impl = op->data;
73782b77998SStan Tomov     int ierr;
73882b77998SStan Tomov 
73982b77998SStan Tomov     if (!impl->qdata) {
74082b77998SStan Tomov         CeedInt Q;
74182b77998SStan Tomov         ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
74282b77998SStan Tomov         ierr = CeedVectorCreate(op->ceed,
74382b77998SStan Tomov                                 op->Erestrict->nelem * Q
74482b77998SStan Tomov                                 * op->qf->qdatasize / sizeof(CeedScalar),
74582b77998SStan Tomov                                 &impl->qdata); CeedChk(ierr);
74682b77998SStan Tomov     }
74782b77998SStan Tomov     *qdata = impl->qdata;
74882b77998SStan Tomov     return 0;
74982b77998SStan Tomov }
75082b77998SStan Tomov 
75182b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) {
75282b77998SStan Tomov     CeedOperator_Magma *impl;
75382b77998SStan Tomov     int ierr;
75482b77998SStan Tomov 
75582b77998SStan Tomov     ierr = CeedCalloc(1, &impl); CeedChk(ierr);
75682b77998SStan Tomov     op->data = impl;
75782b77998SStan Tomov     op->Destroy = CeedOperatorDestroy_Magma;
75882b77998SStan Tomov     op->Apply = CeedOperatorApply_Magma;
75982b77998SStan Tomov     op->GetQData = CeedOperatorGetQData_Magma;
76082b77998SStan Tomov     return 0;
76182b77998SStan Tomov }
76282b77998SStan Tomov 
76382b77998SStan Tomov // *****************************************************************************
76482b77998SStan Tomov // * INIT
76582b77998SStan Tomov // *****************************************************************************
76682b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) {
76782b77998SStan Tomov     if (strcmp(resource, "/cpu/magma")
76882b77998SStan Tomov         && strcmp(resource, "/gpu/magma"))
76982b77998SStan Tomov         return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource);
77093fbe329SStan Tomov 
77193fbe329SStan Tomov     magma_init();
77293fbe329SStan Tomov     //magma_print_environment();
77393fbe329SStan Tomov 
77482b77998SStan Tomov     ceed->VecCreate = CeedVectorCreate_Magma;
77582b77998SStan Tomov     ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma;
77682b77998SStan Tomov     ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma;
77782b77998SStan Tomov     ceed->QFunctionCreate = CeedQFunctionCreate_Magma;
77882b77998SStan Tomov     ceed->OperatorCreate = CeedOperatorCreate_Magma;
77982b77998SStan Tomov     return 0;
78082b77998SStan Tomov }
78182b77998SStan Tomov 
78282b77998SStan Tomov // *****************************************************************************
78382b77998SStan Tomov // * REGISTER
78482b77998SStan Tomov // *****************************************************************************
78582b77998SStan Tomov __attribute__((constructor))
78682b77998SStan Tomov static void Register(void) {
78782b77998SStan Tomov     CeedRegister("/cpu/magma", CeedInit_Magma);
78882b77998SStan Tomov     CeedRegister("/gpu/magma", CeedInit_Magma);
78982b77998SStan Tomov }
790