xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.c (revision 1751b0a91f199612d28d31020853a60b0f7f7900)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed-impl.h>
18 #include <string.h>
19 #include "magma.h"
20 
21 typedef struct {
22     CeedScalar *array;
23     CeedScalar *darray;
24     int own_;
25 } CeedVector_Magma;
26 
27 typedef struct {
28     CeedInt *indices;
29     CeedInt *dindices;
30     int own_;
31 } CeedElemRestriction_Magma;
32 
33 typedef struct {
34     CeedVector etmp;
35     CeedVector qdata;
36 } CeedOperator_Magma;
37 
38 // *****************************************************************************
39 // * Initialize vector vec (after free mem) with values from array based on cmode
40 // *   CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal
41 // *                     to array, and data is copied (not store passed pointer)
42 // *   CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array
43 // *   CEED_USE_POINTER: vec->data->array = array (can modify; no ownership)
44 // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE
45 // *****************************************************************************
46 static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype,
47                                     CeedCopyMode cmode, CeedScalar *array) {
48     CeedVector_Magma *impl = vec->data;
49     int ierr;
50 
51     // If own data, free that "old" data, e.g., as it may be of different size
52     if (impl->own_){
53         magma_free( impl->darray );
54         magma_free_pinned( impl->array );
55         impl->darray = NULL;
56         impl->array  = NULL;
57         impl->own_ = 0;
58     }
59 
60     if (mtype == CEED_MEM_HOST) {
61         // memory is on the host; own_ = 0
62         switch (cmode) {
63         case CEED_COPY_VALUES:
64             ierr = magma_malloc( (void**)&impl->darray,
65                                  vec->length * sizeof(CeedScalar)); CeedChk(ierr);
66             ierr = magma_malloc_pinned( (void**)&impl->array,
67                                         vec->length * sizeof(CeedScalar)); CeedChk(ierr);
68             impl->own_ = 1;
69 
70             if (array)
71                 magma_setvector(vec->length, sizeof(array[0]),
72                                 array, 1, impl->darray, 1);
73             break;
74         case CEED_OWN_POINTER:
75             ierr = magma_malloc( (void**)&impl->darray,
76                                  vec->length * sizeof(CeedScalar)); CeedChk(ierr);
77             // TODO: possible problem here is if we are passed non-pinned memory;
78             //       (as we own it, lter in destroy, we use free for pinned memory).
79             impl->array = array;
80             impl->own_ = 1;
81 
82             if (array)
83                 magma_setvector(vec->length, sizeof(array[0]),
84                                 array, 1, impl->darray, 1);
85             break;
86         case CEED_USE_POINTER:
87             impl->darray = NULL;
88             impl->array  = array;
89         }
90     } else if (mtype == CEED_MEM_DEVICE) {
91         // memory is on the device; own = 0
92         switch (cmode) {
93         case CEED_COPY_VALUES:
94             ierr = magma_malloc( (void**)&impl->darray,
95                                 vec->length * sizeof(CeedScalar)); CeedChk(ierr);
96             ierr = magma_malloc_pinned( (void**)&impl->array,
97                                         vec->length * sizeof(CeedScalar)); CeedChk(ierr);
98             impl->own_ = 1;
99 
100             if (array)
101                 magma_copyvector(vec->length, sizeof(array[0]),
102                                  array, 1, impl->darray, 1);
103             break;
104         case CEED_OWN_POINTER:
105             impl->darray = array;
106             ierr = magma_malloc_pinned( (void**)&impl->array,
107                                         vec->length * sizeof(CeedScalar)); CeedChk(ierr);
108             impl->own_ = 1;
109 
110             break;
111         case CEED_USE_POINTER:
112             impl->darray = array;
113             impl->array  = NULL;
114         }
115 
116     } else
117         return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported");
118 
119     return 0;
120 }
121 
122 // *****************************************************************************
123 // * Give data pointer from vector vec to array (on HOST or DEVICE)
124 // *****************************************************************************
125 static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype,
126                                     CeedScalar **array) {
127     CeedVector_Magma *impl = vec->data;
128     int ierr;
129 
130     if (mtype == CEED_MEM_HOST) {
131         if (!impl->array){
132             // Allocate data if array is NULL
133             ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
134             CeedChk(ierr);
135         } else if (impl->own_) {
136             // data is owned so GPU had the most up-to-date version; copy it
137             magma_getvector(vec->length, sizeof(*array[0]),
138                             impl->darray, 1, impl->array, 1);
139         }
140         *array = impl->array;
141     } else if (mtype == CEED_MEM_DEVICE) {
142         if (!impl->darray){
143             // Allocate data if darray is NULL
144             ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
145             CeedChk(ierr);
146         }
147         *array = impl->darray;
148     } else
149         return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory");
150 
151     return 0;
152 }
153 
154 // *****************************************************************************
155 // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it
156 // *****************************************************************************
157 static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype,
158                                       const CeedScalar **array) {
159     CeedVector_Magma *impl = vec->data;
160     int ierr;
161 
162     if (mtype == CEED_MEM_HOST) {
163         if (!impl->array){
164             // Allocate data if array is NULL
165             ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
166             CeedChk(ierr);
167         } else if (impl->own_) {
168             // data is owned so GPU had the most up-to-date version; copy it
169             magma_getvector(vec->length, sizeof(*array[0]),
170                             impl->darray, 1, impl->array, 1);
171         }
172         *array = impl->array;
173     } else if (mtype == CEED_MEM_DEVICE) {
174         if (!impl->darray){
175             // Allocate data if darray is NULL
176             ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL);
177             CeedChk(ierr);
178         }
179         *array = impl->darray;
180     } else
181         return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory");
182 
183     return 0;
184 }
185 
186 // *****************************************************************************
187 // * There is no mtype here for array so it is not clear if we restore from HOST
188 // * memory or from DEVICE memory. We assume that it is CPU memory because if
189 // * it was GPU memory we would not call this routine at all.
190 // * Restore vector vec with values from array, where array received its values
191 // * from vec and possibly modified them.
192 // *****************************************************************************
193 static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) {
194     CeedVector_Magma *impl = vec->data;
195 
196     if (impl->darray!=NULL)
197         magma_setvector(vec->length, sizeof(*array[0]),
198                         *array, 1, impl->darray, 1);
199 
200     *array = NULL;
201     return 0;
202 }
203 
204 // *****************************************************************************
205 // * There is no mtype here for array so it is not clear if we restore from HOST
206 // * memory or from DEVICE memory. We assume that it is CPU memory because if
207 // * it was GPU memory we would not call this routine at all.
208 // * Restore vector vec with values from array, where array received its values
209 // * from vec to only read them; in this case vec may have been modified meanwhile
210 // * and needs to be restored here.
211 // *****************************************************************************
212 static int CeedVectorRestoreArrayRead_Magma(CeedVector vec,
213                                             const CeedScalar **array) {
214     CeedVector_Magma *impl = vec->data;
215 
216     if (impl->darray!=NULL)
217         magma_setvector(vec->length, sizeof(*array[0]),
218                         *array, 1, impl->darray, 1);
219 
220     *array = NULL;
221     return 0;
222 }
223 
224 static int CeedVectorDestroy_Magma(CeedVector vec) {
225     CeedVector_Magma *impl = vec->data;
226     int ierr;
227 
228     // Free if we own the data
229     if (impl->own_){
230         ierr = magma_free_pinned(impl->array); CeedChk(ierr);
231         ierr = magma_free(impl->darray);       CeedChk(ierr);
232     }
233     ierr = CeedFree(&vec->data); CeedChk(ierr);
234     return 0;
235 }
236 
237 // *****************************************************************************
238 // * Create vector vec of size n
239 // *****************************************************************************
240 static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) {
241     CeedVector_Magma *impl;
242     int ierr;
243 
244     vec->SetArray = CeedVectorSetArray_Magma;
245     vec->GetArray = CeedVectorGetArray_Magma;
246     vec->GetArrayRead = CeedVectorGetArrayRead_Magma;
247     vec->RestoreArray = CeedVectorRestoreArray_Magma;
248     vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma;
249     vec->Destroy = CeedVectorDestroy_Magma;
250     ierr = CeedCalloc(1,&impl); CeedChk(ierr);
251     impl->darray = NULL;
252     impl->array  = NULL;
253     impl->own_ = 0;
254     vec->data = impl;
255     return 0;
256 }
257 
258 // *****************************************************************************
259 // * Apply restriction operator r to u: v = r(rmode) u
260 // *****************************************************************************
261 static int CeedElemRestrictionApply_Magma(CeedElemRestriction r,
262                                         CeedTransposeMode tmode, CeedInt ncomp,
263                                         CeedTransposeMode lmode, CeedVector u,
264                                         CeedVector v, CeedRequest *request) {
265     CeedElemRestriction_Magma *impl = r->data;
266     int ierr;
267     const CeedScalar *uu;
268     CeedScalar *vv;
269     CeedInt esize = r->nelem*r->elemsize;
270     //printf("HELLOOOOOOOOO=======================\n");
271 
272     ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
273     ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
274     if (tmode == CEED_NOTRANSPOSE) {
275         // Perform: v = r * u
276         if (ncomp == 1) {
277             for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]];
278 
279             /*
280             // Works - in t05 x has to be with CEED_COPY_VALUES
281             CeedVector_Magma *uimpl = u->data, *vimpl = v->data;
282             uu = uimpl->darray;
283             vv = vimpl->darray;
284             CeedInt *indices;// = (int *)impl->indices;
285             magma_malloc( (void**)&indices, esize * sizeof(CeedInt));
286             magma_setvector(esize, sizeof(CeedInt),
287                             (int *)impl->indices, 1, indices, 1);
288             magma_template<<i=0:esize>>(const CeedScalar *uu, CeedScalar *vv, CeedInt *indices)
289               {
290                  vv[i] = uu[indices[i]];
291               }
292 
293             // printf("HELLOOOOOOOOO....... size = %d\n", esize);
294             //
295 
296             if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
297                 *request = NULL;
298             return 0;
299             */
300 
301         } else {
302             //printf("HELLOOOOOOOOO-------------\n");
303             // vv is (elemsize x ncomp x nelem), column-major
304             if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
305                 for (CeedInt e = 0; e < r->nelem; e++)
306                     for (CeedInt d = 0; d < ncomp; d++)
307                         for (CeedInt i=0; i<r->elemsize; i++) {
308                             vv[i+r->elemsize*(d+ncomp*e)] =
309                                 uu[impl->indices[i+r->elemsize*e]+r->ndof*d];
310                         }
311             } else { // u is (ncomp x ndof), column-major
312                 for (CeedInt e = 0; e < r->nelem; e++)
313                     for (CeedInt d = 0; d < ncomp; d++)
314                         for (CeedInt i=0; i<r->elemsize; i++) {
315                             vv[i+r->elemsize*(d+ncomp*e)] =
316                                 uu[d+ncomp*impl->indices[i+r->elemsize*e]];
317                         }
318             }
319         }
320     } else {
321         // Note: in transpose mode, we perform: v += r^t * u
322         if (ncomp == 1) {
323             for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i];
324         } else {
325             //printf("HELLOOOOOOOOO+++++++++++++\n");
326             // u is (elemsize x ncomp x nelem)
327             if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
328                 for (CeedInt e = 0; e < r->nelem; e++)
329                     for (CeedInt d = 0; d < ncomp; d++)
330                         for (CeedInt i=0; i<r->elemsize; i++) {
331                             vv[impl->indices[i+r->elemsize*e]+r->ndof*d] +=
332                                 uu[i+r->elemsize*(d+e*ncomp)];
333                         }
334             } else { // vv is (ncomp x ndof), column-major
335                 for (CeedInt e = 0; e < r->nelem; e++)
336                     for (CeedInt d = 0; d < ncomp; d++)
337                         for (CeedInt i=0; i<r->elemsize; i++) {
338                             vv[d+ncomp*impl->indices[i+r->elemsize*e]] +=
339                                 uu[i+r->elemsize*(d+e*ncomp)];
340                         }
341             }
342         }
343     }
344     ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
345     ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
346     if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
347         *request = NULL;
348     return 0;
349 }
350 
351 static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) {
352   CeedElemRestriction_Magma *impl = r->data;
353   int ierr;
354 
355   // Free if we own the data
356   if (impl->own_){
357       ierr = magma_free_pinned(impl->indices); CeedChk(ierr);
358       ierr = magma_free(impl->dindices);       CeedChk(ierr);
359   }
360   ierr = CeedFree(&r->data); CeedChk(ierr);
361   return 0;
362 }
363 
364 static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r,
365                                            CeedMemType mtype,
366                                            CeedCopyMode cmode,
367                                            const CeedInt *indices) {
368     int ierr, size = r->nelem*r->elemsize;
369     CeedElemRestriction_Magma *impl;
370 
371     // Allocate memory for the MAGMA Restricton and initializa pointers to NULL
372     ierr = CeedCalloc(1,&impl); CeedChk(ierr);
373     impl->dindices = NULL;
374     impl->indices  = NULL;
375     impl->own_ = 0;
376 
377     if (mtype == CEED_MEM_HOST) {
378         // memory is on the host; own_ = 0
379         switch (cmode) {
380         case CEED_COPY_VALUES:
381             ierr = magma_malloc( (void**)&impl->dindices,
382                                   size * sizeof(CeedInt)); CeedChk(ierr);
383             ierr = magma_malloc_pinned( (void**)&impl->indices,
384                                         size * sizeof(CeedInt)); CeedChk(ierr);
385             impl->own_ = 1;
386 
387             if (indices)
388                 magma_setvector(size, sizeof(CeedInt),
389                                 indices, 1, impl->dindices, 1);
390             break;
391         case CEED_OWN_POINTER:
392             ierr = magma_malloc( (void**)&impl->dindices,
393                                  size * sizeof(CeedInt)); CeedChk(ierr);
394             // TODO: possible problem here is if we are passed non-pinned memory;
395             //       (as we own it, lter in destroy, we use free for pinned memory).
396             impl->indices = indices;
397             impl->own_ = 1;
398 
399             if (indices)
400                 magma_setvector(size, sizeof(CeedInt),
401                                 indices, 1, impl->dindices, 1);
402             break;
403         case CEED_USE_POINTER:
404             impl->dindices = NULL;
405             impl->indices  = indices;
406         }
407     } else if (mtype == CEED_MEM_DEVICE) {
408         // memory is on the device; own = 0
409         switch (cmode) {
410         case CEED_COPY_VALUES:
411             ierr = magma_malloc( (void**)&impl->dindices,
412                                  size * sizeof(CeedInt)); CeedChk(ierr);
413             ierr = magma_malloc_pinned( (void**)&impl->indices,
414                                         size * sizeof(CeedInt)); CeedChk(ierr);
415             impl->own_ = 1;
416 
417             if (indices)
418                 magma_copyvector(size, sizeof(CeedInt),
419                                  indices, 1, impl->dindices, 1);
420             break;
421         case CEED_OWN_POINTER:
422             impl->dindices = indices;
423             ierr = magma_malloc_pinned( (void**)&impl->indices,
424                                         size * sizeof(CeedInt)); CeedChk(ierr);
425             impl->own_ = 1;
426 
427             break;
428         case CEED_USE_POINTER:
429             impl->dindices = indices;
430             impl->indices  = NULL;
431         }
432 
433     } else
434         return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported");
435 
436     r->data    = impl;
437     r->Apply   = CeedElemRestrictionApply_Magma;
438     r->Destroy = CeedElemRestrictionDestroy_Magma;
439 
440     return 0;
441 }
442 
443 // Contracts on the middle index
444 // NOTRANSPOSE: V_ajc = T_jb U_abc
445 // TRANSPOSE:   V_ajc = T_bj U_abc
446 // If Add != 0, "=" is replaced by "+="
447 static int CeedTensorContract_Magma(Ceed ceed,
448                                     CeedInt A, CeedInt B, CeedInt C, CeedInt J,
449                                     const CeedScalar *t, CeedTransposeMode tmode,
450                                     const CeedInt Add,
451                                     const CeedScalar *u, CeedScalar *v) {
452     CeedInt tstride0 = B, tstride1 = 1;
453     if (tmode == CEED_TRANSPOSE) {
454         tstride0 = 1; tstride1 = J;
455     }
456 
457     for (CeedInt a=0; a<A; a++) {
458         for (CeedInt j=0; j<J; j++) {
459             if (!Add) {
460                 for (CeedInt c=0; c<C; c++)
461                     v[(a*J+j)*C+c] = 0;
462             }
463             for (CeedInt b=0; b<B; b++) {
464                 for (CeedInt c=0; c<C; c++) {
465                     v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c];
466                 }
467             }
468         }
469     }
470     return 0;
471 }
472 
473 static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode,
474                                 CeedEvalMode emode,
475                                 const CeedScalar *u, CeedScalar *v) {
476     int ierr;
477     const CeedInt dim = basis->dim;
478     const CeedInt ndof = basis->ndof;
479     const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim);
480     const CeedInt add = (tmode == CEED_TRANSPOSE);
481 
482     if (tmode == CEED_TRANSPOSE) {
483         const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim);
484         for (CeedInt i = 0; i < vsize; i++)
485             v[i] = (CeedScalar) 0;
486     }
487     if (emode & CEED_EVAL_INTERP) {
488         CeedInt P = basis->P1d, Q = basis->Q1d;
489         if (tmode == CEED_TRANSPOSE) {
490             P = basis->Q1d; Q = basis->P1d;
491         }
492         CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
493         CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
494         for (CeedInt d=0; d<dim; d++) {
495             ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d,
496                                             tmode, add&&(d==dim-1),
497                                             d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
498             CeedChk(ierr);
499             pre /= P;
500             post *= Q;
501         }
502         if (tmode == CEED_NOTRANSPOSE) {
503             v += nqpt;
504         } else {
505             u += nqpt;
506         }
507     }
508     if (emode & CEED_EVAL_GRAD) {
509         CeedInt P = basis->P1d, Q = basis->Q1d;
510         // In CEED_NOTRANSPOSE mode:
511         // u is (P^dim x nc), column-major layout (nc = ndof)
512         // v is (Q^dim x nc x dim), column-major layout (nc = ndof)
513         // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
514         if (tmode == CEED_TRANSPOSE) {
515             P = basis->Q1d, Q = basis->P1d;
516         }
517         CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
518         for (CeedInt p = 0; p < dim; p++) {
519             CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
520             for (CeedInt d=0; d<dim; d++) {
521                 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q,
522                                                 (p==d)?basis->grad1d:basis->interp1d,
523                                                 tmode, add&&(d==dim-1),
524                                                 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
525                 CeedChk(ierr);
526                 pre /= P;
527                 post *= Q;
528             }
529             if (tmode == CEED_NOTRANSPOSE) {
530                 v += nqpt;
531             } else {
532                 u += nqpt;
533             }
534         }
535     }
536     if (emode & CEED_EVAL_WEIGHT) {
537         if (tmode == CEED_TRANSPOSE)
538             return CeedError(basis->ceed, 1,
539                              "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
540         CeedInt Q = basis->Q1d;
541         for (CeedInt d=0; d<dim; d++) {
542             CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d);
543             for (CeedInt i=0; i<pre; i++) {
544                 for (CeedInt j=0; j<Q; j++) {
545                     for (CeedInt k=0; k<post; k++) {
546                         v[(i*Q + j)*post + k] = basis->qweight1d[j]
547                             * (d == 0 ? 1 : v[(i*Q + j)*post + k]);
548                     }
549                 }
550             }
551         }
552     }
553     return 0;
554 }
555 
556 static int CeedBasisDestroy_Magma(CeedBasis basis) {
557     return 0;
558 }
559 
560 static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d,
561                                          CeedInt Q1d, const CeedScalar *interp1d,
562                                          const CeedScalar *grad1d,
563                                          const CeedScalar *qref1d,
564                                          const CeedScalar *qweight1d,
565                                          CeedBasis basis) {
566     basis->Apply = CeedBasisApply_Magma;
567     basis->Destroy = CeedBasisDestroy_Magma;
568     return 0;
569 }
570 
571 static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q,
572                                     const CeedScalar *const *u,
573                                     CeedScalar *const *v) {
574     int ierr;
575     ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr);
576     return 0;
577 }
578 
579 static int CeedQFunctionDestroy_Magma(CeedQFunction qf) {
580     return 0;
581 }
582 
583 static int CeedQFunctionCreate_Magma(CeedQFunction qf) {
584     qf->Apply = CeedQFunctionApply_Magma;
585     qf->Destroy = CeedQFunctionDestroy_Magma;
586     return 0;
587 }
588 
589 static int CeedOperatorDestroy_Magma(CeedOperator op) {
590     CeedOperator_Magma *impl = op->data;
591     int ierr;
592 
593     ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr);
594     ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr);
595     ierr = CeedFree(&op->data); CeedChk(ierr);
596     return 0;
597 }
598 
599 static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata,
600                                    CeedVector ustate,
601                                    CeedVector residual, CeedRequest *request) {
602     CeedOperator_Magma *impl = op->data;
603     CeedVector etmp;
604     CeedInt Q;
605     const CeedInt nc = op->basis->ndof, dim = op->basis->dim;
606     CeedScalar *Eu;
607     char *qd;
608     int ierr;
609     CeedTransposeMode lmode = CEED_NOTRANSPOSE;
610 
611     if (!impl->etmp) {
612         ierr = CeedVectorCreate(op->ceed,
613                                 nc * op->Erestrict->nelem * op->Erestrict->elemsize,
614                                 &impl->etmp); CeedChk(ierr);
615         // etmp is allocated when CeedVectorGetArray is called below
616     }
617     etmp = impl->etmp;
618     if (op->qf->inmode & ~CEED_EVAL_WEIGHT) {
619         ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE,
620                                         nc, lmode, ustate, etmp,
621                                         CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
622     }
623     ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
624     ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr);
625     ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd);
626     CeedChk(ierr);
627     for (CeedInt e=0; e<op->Erestrict->nelem; e++) {
628         CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0};
629         const CeedScalar *in[5] = {0,0,0,0,0};
630         // TODO: quadrature weights can be computed just once
631         ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode,
632                               &Eu[e*op->Erestrict->elemsize*nc], BEu);
633         CeedChk(ierr);
634         CeedScalar *u_ptr = BEu, *v_ptr = BEv;
635         if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; }
636         if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; }
637         if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; }
638         if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; }
639         if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; }
640         ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out);
641         CeedChk(ierr);
642         ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv,
643                               &Eu[e*op->Erestrict->elemsize*nc]);
644         CeedChk(ierr);
645     }
646     ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr);
647     if (residual) {
648         CeedScalar *res;
649         CeedVectorGetArray(residual, CEED_MEM_HOST, &res);
650         for (int i = 0; i < residual->length; i++)
651             res[i] = (CeedScalar)0;
652         ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE,
653                                         nc, lmode, etmp, residual,
654                                         CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
655     }
656     if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
657         *request = NULL;
658     return 0;
659 }
660 
661 static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) {
662     CeedOperator_Magma *impl = op->data;
663     int ierr;
664 
665     if (!impl->qdata) {
666         CeedInt Q;
667         ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
668         ierr = CeedVectorCreate(op->ceed,
669                                 op->Erestrict->nelem * Q
670                                 * op->qf->qdatasize / sizeof(CeedScalar),
671                                 &impl->qdata); CeedChk(ierr);
672     }
673     *qdata = impl->qdata;
674     return 0;
675 }
676 
677 static int CeedOperatorCreate_Magma(CeedOperator op) {
678     CeedOperator_Magma *impl;
679     int ierr;
680 
681     ierr = CeedCalloc(1, &impl); CeedChk(ierr);
682     op->data = impl;
683     op->Destroy = CeedOperatorDestroy_Magma;
684     op->Apply = CeedOperatorApply_Magma;
685     op->GetQData = CeedOperatorGetQData_Magma;
686     return 0;
687 }
688 
689 // *****************************************************************************
690 // * INIT
691 // *****************************************************************************
692 static int CeedInit_Magma(const char *resource, Ceed ceed) {
693     if (strcmp(resource, "/cpu/magma")
694         && strcmp(resource, "/gpu/magma"))
695         return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource);
696 
697     magma_init();
698     //magma_print_environment();
699 
700     ceed->VecCreate = CeedVectorCreate_Magma;
701     ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma;
702     ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma;
703     ceed->QFunctionCreate = CeedQFunctionCreate_Magma;
704     ceed->OperatorCreate = CeedOperatorCreate_Magma;
705     return 0;
706 }
707 
708 // *****************************************************************************
709 // * REGISTER
710 // *****************************************************************************
711 __attribute__((constructor))
712 static void Register(void) {
713     CeedRegister("/cpu/magma", CeedInit_Magma);
714     CeedRegister("/gpu/magma", CeedInit_Magma);
715 }
716