xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.c (revision 82b77998afc364df0fe1cb8fc42a2cd8b5c94acf)
1*82b77998SStan Tomov // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2*82b77998SStan Tomov // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3*82b77998SStan Tomov // reserved. See files LICENSE and NOTICE for details.
4*82b77998SStan Tomov //
5*82b77998SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
6*82b77998SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
7*82b77998SStan Tomov // element discretizations for exascale applications. For more information and
8*82b77998SStan Tomov // source code availability see http://github.com/ceed.
9*82b77998SStan Tomov //
10*82b77998SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11*82b77998SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
12*82b77998SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
13*82b77998SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
14*82b77998SStan Tomov // software, applications, hardware, advanced system engineering and early
15*82b77998SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
16*82b77998SStan Tomov 
17*82b77998SStan Tomov #include <ceed-impl.h>
18*82b77998SStan Tomov #include <string.h>
19*82b77998SStan Tomov 
20*82b77998SStan Tomov typedef struct {
21*82b77998SStan Tomov   CeedScalar *array;
22*82b77998SStan Tomov   CeedScalar *array_allocated;
23*82b77998SStan Tomov } CeedVector_Magma;
24*82b77998SStan Tomov 
25*82b77998SStan Tomov typedef struct {
26*82b77998SStan Tomov   const CeedInt *indices;
27*82b77998SStan Tomov   CeedInt *indices_allocated;
28*82b77998SStan Tomov } CeedElemRestriction_Magma;
29*82b77998SStan Tomov 
30*82b77998SStan Tomov typedef struct {
31*82b77998SStan Tomov   CeedVector etmp;
32*82b77998SStan Tomov   CeedVector qdata;
33*82b77998SStan Tomov } CeedOperator_Magma;
34*82b77998SStan Tomov 
35*82b77998SStan Tomov static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype,
36*82b77998SStan Tomov                                   CeedCopyMode cmode, CeedScalar *array) {
37*82b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
38*82b77998SStan Tomov   int ierr;
39*82b77998SStan Tomov 
40*82b77998SStan Tomov   if (mtype != CEED_MEM_HOST)
41*82b77998SStan Tomov     return CeedError(vec->ceed, 1, "Only MemType = HOST supported");
42*82b77998SStan Tomov   ierr = CeedFree(&impl->array_allocated); CeedChk(ierr);
43*82b77998SStan Tomov   switch (cmode) {
44*82b77998SStan Tomov   case CEED_COPY_VALUES:
45*82b77998SStan Tomov     ierr = CeedMalloc(vec->length, &impl->array_allocated); CeedChk(ierr);
46*82b77998SStan Tomov     impl->array = impl->array_allocated;
47*82b77998SStan Tomov     if (array) memcpy(impl->array, array, vec->length * sizeof(array[0]));
48*82b77998SStan Tomov     break;
49*82b77998SStan Tomov   case CEED_OWN_POINTER:
50*82b77998SStan Tomov     impl->array_allocated = array;
51*82b77998SStan Tomov     impl->array = array;
52*82b77998SStan Tomov     break;
53*82b77998SStan Tomov   case CEED_USE_POINTER:
54*82b77998SStan Tomov     impl->array = array;
55*82b77998SStan Tomov   }
56*82b77998SStan Tomov   return 0;
57*82b77998SStan Tomov }
58*82b77998SStan Tomov 
59*82b77998SStan Tomov static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype,
60*82b77998SStan Tomov                                   CeedScalar **array) {
61*82b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
62*82b77998SStan Tomov   int ierr;
63*82b77998SStan Tomov 
64*82b77998SStan Tomov   if (mtype != CEED_MEM_HOST)
65*82b77998SStan Tomov     return CeedError(vec->ceed, 1, "Can only provide to HOST memory");
66*82b77998SStan Tomov   if (!impl->array) { // Allocate if array is not yet allocated
67*82b77998SStan Tomov     ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL);
68*82b77998SStan Tomov     CeedChk(ierr);
69*82b77998SStan Tomov   }
70*82b77998SStan Tomov   *array = impl->array;
71*82b77998SStan Tomov   return 0;
72*82b77998SStan Tomov }
73*82b77998SStan Tomov 
74*82b77998SStan Tomov static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype,
75*82b77998SStan Tomov                                       const CeedScalar **array) {
76*82b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
77*82b77998SStan Tomov   int ierr;
78*82b77998SStan Tomov 
79*82b77998SStan Tomov   if (mtype != CEED_MEM_HOST)
80*82b77998SStan Tomov     return CeedError(vec->ceed, 1, "Can only provide to HOST memory");
81*82b77998SStan Tomov   if (!impl->array) { // Allocate if array is not yet allocated
82*82b77998SStan Tomov     ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL);
83*82b77998SStan Tomov     CeedChk(ierr);
84*82b77998SStan Tomov   }
85*82b77998SStan Tomov   *array = impl->array;
86*82b77998SStan Tomov   return 0;
87*82b77998SStan Tomov }
88*82b77998SStan Tomov 
89*82b77998SStan Tomov static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) {
90*82b77998SStan Tomov   *array = NULL;
91*82b77998SStan Tomov   return 0;
92*82b77998SStan Tomov }
93*82b77998SStan Tomov 
94*82b77998SStan Tomov static int CeedVectorRestoreArrayRead_Magma(CeedVector vec,
95*82b77998SStan Tomov     const CeedScalar **array) {
96*82b77998SStan Tomov   *array = NULL;
97*82b77998SStan Tomov   return 0;
98*82b77998SStan Tomov }
99*82b77998SStan Tomov 
100*82b77998SStan Tomov static int CeedVectorDestroy_Magma(CeedVector vec) {
101*82b77998SStan Tomov   CeedVector_Magma *impl = vec->data;
102*82b77998SStan Tomov   int ierr;
103*82b77998SStan Tomov 
104*82b77998SStan Tomov   ierr = CeedFree(&impl->array_allocated); CeedChk(ierr);
105*82b77998SStan Tomov   ierr = CeedFree(&vec->data); CeedChk(ierr);
106*82b77998SStan Tomov   return 0;
107*82b77998SStan Tomov }
108*82b77998SStan Tomov 
109*82b77998SStan Tomov static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) {
110*82b77998SStan Tomov   CeedVector_Magma *impl;
111*82b77998SStan Tomov   int ierr;
112*82b77998SStan Tomov 
113*82b77998SStan Tomov   vec->SetArray = CeedVectorSetArray_Magma;
114*82b77998SStan Tomov   vec->GetArray = CeedVectorGetArray_Magma;
115*82b77998SStan Tomov   vec->GetArrayRead = CeedVectorGetArrayRead_Magma;
116*82b77998SStan Tomov   vec->RestoreArray = CeedVectorRestoreArray_Magma;
117*82b77998SStan Tomov   vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma;
118*82b77998SStan Tomov   vec->Destroy = CeedVectorDestroy_Magma;
119*82b77998SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
120*82b77998SStan Tomov   vec->data = impl;
121*82b77998SStan Tomov   return 0;
122*82b77998SStan Tomov }
123*82b77998SStan Tomov 
124*82b77998SStan Tomov static int CeedElemRestrictionApply_Magma(CeedElemRestriction r,
125*82b77998SStan Tomov                                         CeedTransposeMode tmode, CeedInt ncomp,
126*82b77998SStan Tomov                                         CeedTransposeMode lmode, CeedVector u,
127*82b77998SStan Tomov                                         CeedVector v, CeedRequest *request) {
128*82b77998SStan Tomov   CeedElemRestriction_Magma *impl = r->data;
129*82b77998SStan Tomov   int ierr;
130*82b77998SStan Tomov   const CeedScalar *uu;
131*82b77998SStan Tomov   CeedScalar *vv;
132*82b77998SStan Tomov   CeedInt esize = r->nelem*r->elemsize;
133*82b77998SStan Tomov 
134*82b77998SStan Tomov   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
135*82b77998SStan Tomov   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
136*82b77998SStan Tomov   if (tmode == CEED_NOTRANSPOSE) {
137*82b77998SStan Tomov     // Perform: v = r * u
138*82b77998SStan Tomov     if (ncomp == 1) {
139*82b77998SStan Tomov       for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]];
140*82b77998SStan Tomov     } else {
141*82b77998SStan Tomov       // vv is (elemsize x ncomp x nelem), column-major
142*82b77998SStan Tomov       if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
143*82b77998SStan Tomov         for (CeedInt e = 0; e < r->nelem; e++)
144*82b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
145*82b77998SStan Tomov             for (CeedInt i=0; i<r->elemsize; i++) {
146*82b77998SStan Tomov               vv[i+r->elemsize*(d+ncomp*e)] =
147*82b77998SStan Tomov                 uu[impl->indices[i+r->elemsize*e]+r->ndof*d];
148*82b77998SStan Tomov             }
149*82b77998SStan Tomov       } else { // u is (ncomp x ndof), column-major
150*82b77998SStan Tomov         for (CeedInt e = 0; e < r->nelem; e++)
151*82b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
152*82b77998SStan Tomov             for (CeedInt i=0; i<r->elemsize; i++) {
153*82b77998SStan Tomov               vv[i+r->elemsize*(d+ncomp*e)] =
154*82b77998SStan Tomov                 uu[d+ncomp*impl->indices[i+r->elemsize*e]];
155*82b77998SStan Tomov             }
156*82b77998SStan Tomov       }
157*82b77998SStan Tomov     }
158*82b77998SStan Tomov   } else {
159*82b77998SStan Tomov     // Note: in transpose mode, we perform: v += r^t * u
160*82b77998SStan Tomov     if (ncomp == 1) {
161*82b77998SStan Tomov       for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i];
162*82b77998SStan Tomov     } else {
163*82b77998SStan Tomov       // u is (elemsize x ncomp x nelem)
164*82b77998SStan Tomov       if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
165*82b77998SStan Tomov         for (CeedInt e = 0; e < r->nelem; e++)
166*82b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
167*82b77998SStan Tomov             for (CeedInt i=0; i<r->elemsize; i++) {
168*82b77998SStan Tomov               vv[impl->indices[i+r->elemsize*e]+r->ndof*d] +=
169*82b77998SStan Tomov                 uu[i+r->elemsize*(d+e*ncomp)];
170*82b77998SStan Tomov             }
171*82b77998SStan Tomov       } else { // vv is (ncomp x ndof), column-major
172*82b77998SStan Tomov         for (CeedInt e = 0; e < r->nelem; e++)
173*82b77998SStan Tomov           for (CeedInt d = 0; d < ncomp; d++)
174*82b77998SStan Tomov             for (CeedInt i=0; i<r->elemsize; i++) {
175*82b77998SStan Tomov               vv[d+ncomp*impl->indices[i+r->elemsize*e]] +=
176*82b77998SStan Tomov                 uu[i+r->elemsize*(d+e*ncomp)];
177*82b77998SStan Tomov             }
178*82b77998SStan Tomov       }
179*82b77998SStan Tomov     }
180*82b77998SStan Tomov   }
181*82b77998SStan Tomov   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
182*82b77998SStan Tomov   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
183*82b77998SStan Tomov   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
184*82b77998SStan Tomov     *request = NULL;
185*82b77998SStan Tomov   return 0;
186*82b77998SStan Tomov }
187*82b77998SStan Tomov 
188*82b77998SStan Tomov static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) {
189*82b77998SStan Tomov   CeedElemRestriction_Magma *impl = r->data;
190*82b77998SStan Tomov   int ierr;
191*82b77998SStan Tomov 
192*82b77998SStan Tomov   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
193*82b77998SStan Tomov   ierr = CeedFree(&r->data); CeedChk(ierr);
194*82b77998SStan Tomov   return 0;
195*82b77998SStan Tomov }
196*82b77998SStan Tomov 
197*82b77998SStan Tomov static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r,
198*82b77998SStan Tomov     CeedMemType mtype,
199*82b77998SStan Tomov     CeedCopyMode cmode, const CeedInt *indices) {
200*82b77998SStan Tomov   int ierr;
201*82b77998SStan Tomov   CeedElemRestriction_Magma *impl;
202*82b77998SStan Tomov 
203*82b77998SStan Tomov   if (mtype != CEED_MEM_HOST)
204*82b77998SStan Tomov     return CeedError(r->ceed, 1, "Only MemType = HOST supported");
205*82b77998SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
206*82b77998SStan Tomov   switch (cmode) {
207*82b77998SStan Tomov   case CEED_COPY_VALUES:
208*82b77998SStan Tomov     ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated);
209*82b77998SStan Tomov     CeedChk(ierr);
210*82b77998SStan Tomov     memcpy(impl->indices_allocated, indices,
211*82b77998SStan Tomov            r->nelem * r->elemsize * sizeof(indices[0]));
212*82b77998SStan Tomov     impl->indices = impl->indices_allocated;
213*82b77998SStan Tomov     break;
214*82b77998SStan Tomov   case CEED_OWN_POINTER:
215*82b77998SStan Tomov     impl->indices_allocated = (CeedInt *)indices;
216*82b77998SStan Tomov     impl->indices = impl->indices_allocated;
217*82b77998SStan Tomov     break;
218*82b77998SStan Tomov   case CEED_USE_POINTER:
219*82b77998SStan Tomov     impl->indices = indices;
220*82b77998SStan Tomov   }
221*82b77998SStan Tomov   r->data = impl;
222*82b77998SStan Tomov   r->Apply = CeedElemRestrictionApply_Magma;
223*82b77998SStan Tomov   r->Destroy = CeedElemRestrictionDestroy_Magma;
224*82b77998SStan Tomov   return 0;
225*82b77998SStan Tomov }
226*82b77998SStan Tomov 
227*82b77998SStan Tomov // Contracts on the middle index
228*82b77998SStan Tomov // NOTRANSPOSE: V_ajc = T_jb U_abc
229*82b77998SStan Tomov // TRANSPOSE:   V_ajc = T_bj U_abc
230*82b77998SStan Tomov // If Add != 0, "=" is replaced by "+="
231*82b77998SStan Tomov static int CeedTensorContract_Magma(Ceed ceed,
232*82b77998SStan Tomov                                   CeedInt A, CeedInt B, CeedInt C, CeedInt J,
233*82b77998SStan Tomov                                   const CeedScalar *t, CeedTransposeMode tmode,
234*82b77998SStan Tomov                                   const CeedInt Add,
235*82b77998SStan Tomov                                   const CeedScalar *u, CeedScalar *v) {
236*82b77998SStan Tomov   CeedInt tstride0 = B, tstride1 = 1;
237*82b77998SStan Tomov   if (tmode == CEED_TRANSPOSE) {
238*82b77998SStan Tomov     tstride0 = 1; tstride1 = J;
239*82b77998SStan Tomov   }
240*82b77998SStan Tomov 
241*82b77998SStan Tomov   for (CeedInt a=0; a<A; a++) {
242*82b77998SStan Tomov     for (CeedInt j=0; j<J; j++) {
243*82b77998SStan Tomov       if (!Add) {
244*82b77998SStan Tomov         for (CeedInt c=0; c<C; c++)
245*82b77998SStan Tomov           v[(a*J+j)*C+c] = 0;
246*82b77998SStan Tomov       }
247*82b77998SStan Tomov       for (CeedInt b=0; b<B; b++) {
248*82b77998SStan Tomov         for (CeedInt c=0; c<C; c++) {
249*82b77998SStan Tomov           v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c];
250*82b77998SStan Tomov         }
251*82b77998SStan Tomov       }
252*82b77998SStan Tomov     }
253*82b77998SStan Tomov   }
254*82b77998SStan Tomov   return 0;
255*82b77998SStan Tomov }
256*82b77998SStan Tomov 
257*82b77998SStan Tomov static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode,
258*82b77998SStan Tomov                               CeedEvalMode emode,
259*82b77998SStan Tomov                               const CeedScalar *u, CeedScalar *v) {
260*82b77998SStan Tomov   int ierr;
261*82b77998SStan Tomov   const CeedInt dim = basis->dim;
262*82b77998SStan Tomov   const CeedInt ndof = basis->ndof;
263*82b77998SStan Tomov   const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim);
264*82b77998SStan Tomov   const CeedInt add = (tmode == CEED_TRANSPOSE);
265*82b77998SStan Tomov 
266*82b77998SStan Tomov   if (tmode == CEED_TRANSPOSE) {
267*82b77998SStan Tomov     const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim);
268*82b77998SStan Tomov     for (CeedInt i = 0; i < vsize; i++)
269*82b77998SStan Tomov       v[i] = (CeedScalar) 0;
270*82b77998SStan Tomov   }
271*82b77998SStan Tomov   if (emode & CEED_EVAL_INTERP) {
272*82b77998SStan Tomov     CeedInt P = basis->P1d, Q = basis->Q1d;
273*82b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
274*82b77998SStan Tomov       P = basis->Q1d; Q = basis->P1d;
275*82b77998SStan Tomov     }
276*82b77998SStan Tomov     CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
277*82b77998SStan Tomov     CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
278*82b77998SStan Tomov     for (CeedInt d=0; d<dim; d++) {
279*82b77998SStan Tomov       ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d,
280*82b77998SStan Tomov                                     tmode, add&&(d==dim-1),
281*82b77998SStan Tomov                                     d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
282*82b77998SStan Tomov       CeedChk(ierr);
283*82b77998SStan Tomov       pre /= P;
284*82b77998SStan Tomov       post *= Q;
285*82b77998SStan Tomov     }
286*82b77998SStan Tomov     if (tmode == CEED_NOTRANSPOSE) {
287*82b77998SStan Tomov       v += nqpt;
288*82b77998SStan Tomov     } else {
289*82b77998SStan Tomov       u += nqpt;
290*82b77998SStan Tomov     }
291*82b77998SStan Tomov   }
292*82b77998SStan Tomov   if (emode & CEED_EVAL_GRAD) {
293*82b77998SStan Tomov     CeedInt P = basis->P1d, Q = basis->Q1d;
294*82b77998SStan Tomov     // In CEED_NOTRANSPOSE mode:
295*82b77998SStan Tomov     // u is (P^dim x nc), column-major layout (nc = ndof)
296*82b77998SStan Tomov     // v is (Q^dim x nc x dim), column-major layout (nc = ndof)
297*82b77998SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
298*82b77998SStan Tomov     if (tmode == CEED_TRANSPOSE) {
299*82b77998SStan Tomov       P = basis->Q1d, Q = basis->P1d;
300*82b77998SStan Tomov     }
301*82b77998SStan Tomov     CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
302*82b77998SStan Tomov     for (CeedInt p = 0; p < dim; p++) {
303*82b77998SStan Tomov       CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
304*82b77998SStan Tomov       for (CeedInt d=0; d<dim; d++) {
305*82b77998SStan Tomov         ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q,
306*82b77998SStan Tomov                                       (p==d)?basis->grad1d:basis->interp1d,
307*82b77998SStan Tomov                                       tmode, add&&(d==dim-1),
308*82b77998SStan Tomov                                       d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
309*82b77998SStan Tomov         CeedChk(ierr);
310*82b77998SStan Tomov         pre /= P;
311*82b77998SStan Tomov         post *= Q;
312*82b77998SStan Tomov       }
313*82b77998SStan Tomov       if (tmode == CEED_NOTRANSPOSE) {
314*82b77998SStan Tomov         v += nqpt;
315*82b77998SStan Tomov       } else {
316*82b77998SStan Tomov         u += nqpt;
317*82b77998SStan Tomov       }
318*82b77998SStan Tomov     }
319*82b77998SStan Tomov   }
320*82b77998SStan Tomov   if (emode & CEED_EVAL_WEIGHT) {
321*82b77998SStan Tomov     if (tmode == CEED_TRANSPOSE)
322*82b77998SStan Tomov       return CeedError(basis->ceed, 1,
323*82b77998SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
324*82b77998SStan Tomov     CeedInt Q = basis->Q1d;
325*82b77998SStan Tomov     for (CeedInt d=0; d<dim; d++) {
326*82b77998SStan Tomov       CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d);
327*82b77998SStan Tomov       for (CeedInt i=0; i<pre; i++) {
328*82b77998SStan Tomov         for (CeedInt j=0; j<Q; j++) {
329*82b77998SStan Tomov           for (CeedInt k=0; k<post; k++) {
330*82b77998SStan Tomov             v[(i*Q + j)*post + k] = basis->qweight1d[j]
331*82b77998SStan Tomov                                     * (d == 0 ? 1 : v[(i*Q + j)*post + k]);
332*82b77998SStan Tomov           }
333*82b77998SStan Tomov         }
334*82b77998SStan Tomov       }
335*82b77998SStan Tomov     }
336*82b77998SStan Tomov   }
337*82b77998SStan Tomov   return 0;
338*82b77998SStan Tomov }
339*82b77998SStan Tomov 
340*82b77998SStan Tomov static int CeedBasisDestroy_Magma(CeedBasis basis) {
341*82b77998SStan Tomov   return 0;
342*82b77998SStan Tomov }
343*82b77998SStan Tomov 
344*82b77998SStan Tomov static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d,
345*82b77998SStan Tomov                                        CeedInt Q1d, const CeedScalar *interp1d,
346*82b77998SStan Tomov                                        const CeedScalar *grad1d,
347*82b77998SStan Tomov                                        const CeedScalar *qref1d,
348*82b77998SStan Tomov                                        const CeedScalar *qweight1d,
349*82b77998SStan Tomov                                        CeedBasis basis) {
350*82b77998SStan Tomov   basis->Apply = CeedBasisApply_Magma;
351*82b77998SStan Tomov   basis->Destroy = CeedBasisDestroy_Magma;
352*82b77998SStan Tomov   return 0;
353*82b77998SStan Tomov }
354*82b77998SStan Tomov 
355*82b77998SStan Tomov static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q,
356*82b77998SStan Tomov                                   const CeedScalar *const *u,
357*82b77998SStan Tomov                                   CeedScalar *const *v) {
358*82b77998SStan Tomov   int ierr;
359*82b77998SStan Tomov   ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr);
360*82b77998SStan Tomov   return 0;
361*82b77998SStan Tomov }
362*82b77998SStan Tomov 
363*82b77998SStan Tomov static int CeedQFunctionDestroy_Magma(CeedQFunction qf) {
364*82b77998SStan Tomov   return 0;
365*82b77998SStan Tomov }
366*82b77998SStan Tomov 
367*82b77998SStan Tomov static int CeedQFunctionCreate_Magma(CeedQFunction qf) {
368*82b77998SStan Tomov   qf->Apply = CeedQFunctionApply_Magma;
369*82b77998SStan Tomov   qf->Destroy = CeedQFunctionDestroy_Magma;
370*82b77998SStan Tomov   return 0;
371*82b77998SStan Tomov }
372*82b77998SStan Tomov 
373*82b77998SStan Tomov static int CeedOperatorDestroy_Magma(CeedOperator op) {
374*82b77998SStan Tomov   CeedOperator_Magma *impl = op->data;
375*82b77998SStan Tomov   int ierr;
376*82b77998SStan Tomov 
377*82b77998SStan Tomov   ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr);
378*82b77998SStan Tomov   ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr);
379*82b77998SStan Tomov   ierr = CeedFree(&op->data); CeedChk(ierr);
380*82b77998SStan Tomov   return 0;
381*82b77998SStan Tomov }
382*82b77998SStan Tomov 
383*82b77998SStan Tomov static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata,
384*82b77998SStan Tomov                                  CeedVector ustate,
385*82b77998SStan Tomov                                  CeedVector residual, CeedRequest *request) {
386*82b77998SStan Tomov   CeedOperator_Magma *impl = op->data;
387*82b77998SStan Tomov   CeedVector etmp;
388*82b77998SStan Tomov   CeedInt Q;
389*82b77998SStan Tomov   const CeedInt nc = op->basis->ndof, dim = op->basis->dim;
390*82b77998SStan Tomov   CeedScalar *Eu;
391*82b77998SStan Tomov   char *qd;
392*82b77998SStan Tomov   int ierr;
393*82b77998SStan Tomov   CeedTransposeMode lmode = CEED_NOTRANSPOSE;
394*82b77998SStan Tomov 
395*82b77998SStan Tomov   if (!impl->etmp) {
396*82b77998SStan Tomov     ierr = CeedVectorCreate(op->ceed,
397*82b77998SStan Tomov                             nc * op->Erestrict->nelem * op->Erestrict->elemsize,
398*82b77998SStan Tomov                             &impl->etmp); CeedChk(ierr);
399*82b77998SStan Tomov     // etmp is allocated when CeedVectorGetArray is called below
400*82b77998SStan Tomov   }
401*82b77998SStan Tomov   etmp = impl->etmp;
402*82b77998SStan Tomov   if (op->qf->inmode & ~CEED_EVAL_WEIGHT) {
403*82b77998SStan Tomov     ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE,
404*82b77998SStan Tomov                                     nc, lmode, ustate, etmp,
405*82b77998SStan Tomov                                     CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
406*82b77998SStan Tomov   }
407*82b77998SStan Tomov   ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
408*82b77998SStan Tomov   ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr);
409*82b77998SStan Tomov   ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd);
410*82b77998SStan Tomov   CeedChk(ierr);
411*82b77998SStan Tomov   for (CeedInt e=0; e<op->Erestrict->nelem; e++) {
412*82b77998SStan Tomov     CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0};
413*82b77998SStan Tomov     const CeedScalar *in[5] = {0,0,0,0,0};
414*82b77998SStan Tomov     // TODO: quadrature weights can be computed just once
415*82b77998SStan Tomov     ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode,
416*82b77998SStan Tomov                           &Eu[e*op->Erestrict->elemsize*nc], BEu);
417*82b77998SStan Tomov     CeedChk(ierr);
418*82b77998SStan Tomov     CeedScalar *u_ptr = BEu, *v_ptr = BEv;
419*82b77998SStan Tomov     if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; }
420*82b77998SStan Tomov     if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; }
421*82b77998SStan Tomov     if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; }
422*82b77998SStan Tomov     if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; }
423*82b77998SStan Tomov     if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; }
424*82b77998SStan Tomov     ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out);
425*82b77998SStan Tomov     CeedChk(ierr);
426*82b77998SStan Tomov     ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv,
427*82b77998SStan Tomov                           &Eu[e*op->Erestrict->elemsize*nc]);
428*82b77998SStan Tomov     CeedChk(ierr);
429*82b77998SStan Tomov   }
430*82b77998SStan Tomov   ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr);
431*82b77998SStan Tomov   if (residual) {
432*82b77998SStan Tomov     CeedScalar *res;
433*82b77998SStan Tomov     CeedVectorGetArray(residual, CEED_MEM_HOST, &res);
434*82b77998SStan Tomov     for (int i = 0; i < residual->length; i++)
435*82b77998SStan Tomov       res[i] = (CeedScalar)0;
436*82b77998SStan Tomov     ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE,
437*82b77998SStan Tomov                                     nc, lmode, etmp, residual,
438*82b77998SStan Tomov                                     CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
439*82b77998SStan Tomov   }
440*82b77998SStan Tomov   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
441*82b77998SStan Tomov     *request = NULL;
442*82b77998SStan Tomov   return 0;
443*82b77998SStan Tomov }
444*82b77998SStan Tomov 
445*82b77998SStan Tomov static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) {
446*82b77998SStan Tomov   CeedOperator_Magma *impl = op->data;
447*82b77998SStan Tomov   int ierr;
448*82b77998SStan Tomov 
449*82b77998SStan Tomov   if (!impl->qdata) {
450*82b77998SStan Tomov     CeedInt Q;
451*82b77998SStan Tomov     ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
452*82b77998SStan Tomov     ierr = CeedVectorCreate(op->ceed,
453*82b77998SStan Tomov                             op->Erestrict->nelem * Q
454*82b77998SStan Tomov                             * op->qf->qdatasize / sizeof(CeedScalar),
455*82b77998SStan Tomov                             &impl->qdata); CeedChk(ierr);
456*82b77998SStan Tomov   }
457*82b77998SStan Tomov   *qdata = impl->qdata;
458*82b77998SStan Tomov   return 0;
459*82b77998SStan Tomov }
460*82b77998SStan Tomov 
461*82b77998SStan Tomov static int CeedOperatorCreate_Magma(CeedOperator op) {
462*82b77998SStan Tomov   CeedOperator_Magma *impl;
463*82b77998SStan Tomov   int ierr;
464*82b77998SStan Tomov 
465*82b77998SStan Tomov   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
466*82b77998SStan Tomov   op->data = impl;
467*82b77998SStan Tomov   op->Destroy = CeedOperatorDestroy_Magma;
468*82b77998SStan Tomov   op->Apply = CeedOperatorApply_Magma;
469*82b77998SStan Tomov   op->GetQData = CeedOperatorGetQData_Magma;
470*82b77998SStan Tomov   return 0;
471*82b77998SStan Tomov }
472*82b77998SStan Tomov 
473*82b77998SStan Tomov // *****************************************************************************
474*82b77998SStan Tomov // * INIT
475*82b77998SStan Tomov // *****************************************************************************
476*82b77998SStan Tomov static int CeedInit_Magma(const char *resource, Ceed ceed) {
477*82b77998SStan Tomov   if (strcmp(resource, "/cpu/magma")
478*82b77998SStan Tomov       && strcmp(resource, "/gpu/magma"))
479*82b77998SStan Tomov       return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource);
480*82b77998SStan Tomov   ceed->VecCreate = CeedVectorCreate_Magma;
481*82b77998SStan Tomov   ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma;
482*82b77998SStan Tomov   ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma;
483*82b77998SStan Tomov   ceed->QFunctionCreate = CeedQFunctionCreate_Magma;
484*82b77998SStan Tomov   ceed->OperatorCreate = CeedOperatorCreate_Magma;
485*82b77998SStan Tomov   return 0;
486*82b77998SStan Tomov }
487*82b77998SStan Tomov 
488*82b77998SStan Tomov // *****************************************************************************
489*82b77998SStan Tomov // * REGISTER
490*82b77998SStan Tomov // *****************************************************************************
491*82b77998SStan Tomov __attribute__((constructor))
492*82b77998SStan Tomov static void Register(void) {
493*82b77998SStan Tomov   CeedRegister("/cpu/magma", CeedInit_Magma);
494*82b77998SStan Tomov   CeedRegister("/gpu/magma", CeedInit_Magma);
495*82b77998SStan Tomov }
496