xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision 7f5b97318525f14c2c6b9e4eb41d4a37b3a25b45)
1*7f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2*7f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3*7f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details.
4*7f5b9731SStan Tomov //
5*7f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
6*7f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
7*7f5b9731SStan Tomov // element discretizations for exascale applications. For more information and
8*7f5b9731SStan Tomov // source code availability see http://github.com/ceed.
9*7f5b9731SStan Tomov //
10*7f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11*7f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
12*7f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
13*7f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
14*7f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early
15*7f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
16*7f5b9731SStan Tomov 
17*7f5b9731SStan Tomov #include "ceed-magma.h"
18*7f5b9731SStan Tomov 
19*7f5b9731SStan Tomov #ifdef __cplusplus
20*7f5b9731SStan Tomov CEED_INTERN "C"
21*7f5b9731SStan Tomov #endif
22*7f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
23*7f5b9731SStan Tomov 			 CeedTransposeMode tmode, CeedEvalMode emode,
24*7f5b9731SStan Tomov 			 CeedVector U, CeedVector V)
25*7f5b9731SStan Tomov {
26*7f5b9731SStan Tomov   int ierr;
27*7f5b9731SStan Tomov   Ceed ceed;
28*7f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
29*7f5b9731SStan Tomov   CeedInt dim, ncomp, ndof, nqpt;
30*7f5b9731SStan Tomov   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
31*7f5b9731SStan Tomov   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
32*7f5b9731SStan Tomov   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
33*7f5b9731SStan Tomov   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
34*7f5b9731SStan Tomov   const CeedScalar *u;
35*7f5b9731SStan Tomov   CeedScalar *v;
36*7f5b9731SStan Tomov   if (U) {
37*7f5b9731SStan Tomov     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChk(ierr);
38*7f5b9731SStan Tomov   } else if (emode != CEED_EVAL_WEIGHT) {
39*7f5b9731SStan Tomov     // LCOV_EXCL_START
40*7f5b9731SStan Tomov     return CeedError(ceed, 1,
41*7f5b9731SStan Tomov                      "An input vector is required for this CeedEvalMode");
42*7f5b9731SStan Tomov     // LCOV_EXCL_STOP
43*7f5b9731SStan Tomov   }
44*7f5b9731SStan Tomov   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChk(ierr);
45*7f5b9731SStan Tomov 
46*7f5b9731SStan Tomov   CeedBasis_Magma *impl;
47*7f5b9731SStan Tomov   ierr = CeedBasisGetData(basis, (void*)&impl); CeedChk(ierr);
48*7f5b9731SStan Tomov 
49*7f5b9731SStan Tomov   CeedInt P1d, Q1d;
50*7f5b9731SStan Tomov   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr);
51*7f5b9731SStan Tomov   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr);
52*7f5b9731SStan Tomov 
53*7f5b9731SStan Tomov   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d",
54*7f5b9731SStan Tomov             ncomp*CeedIntPow(P1d, dim), ncomp);
55*7f5b9731SStan Tomov 
56*7f5b9731SStan Tomov   if (tmode == CEED_TRANSPOSE) {
57*7f5b9731SStan Tomov      CeedInt length;
58*7f5b9731SStan Tomov      ierr = CeedVectorGetLength(V, &length);
59*7f5b9731SStan Tomov      magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length );
60*7f5b9731SStan Tomov   }
61*7f5b9731SStan Tomov   if (emode & CEED_EVAL_INTERP) {
62*7f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
63*7f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
64*7f5b9731SStan Tomov       P = Q1d; Q = P1d;
65*7f5b9731SStan Tomov     }
66*7f5b9731SStan Tomov 
67*7f5b9731SStan Tomov     // Define element sizes for dofs/quad
68*7f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
69*7f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
70*7f5b9731SStan Tomov 
71*7f5b9731SStan Tomov      // E-vector ordering -------------- Q-vector ordering
72*7f5b9731SStan Tomov      //  elem                            component
73*7f5b9731SStan Tomov      //    component                        elem
74*7f5b9731SStan Tomov      //       node                            node
75*7f5b9731SStan Tomov 
76*7f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
77*7f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
78*7f5b9731SStan Tomov 
79*7f5b9731SStan Tomov     // Element strides
80*7f5b9731SStan Tomov     CeedInt u_elstride = ncomp * eldofssize;
81*7f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
82*7f5b9731SStan Tomov     // Component strides
83*7f5b9731SStan Tomov     CeedInt u_compstride = eldofssize;
84*7f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
85*7f5b9731SStan Tomov 
86*7f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
87*7f5b9731SStan Tomov     if(tmode == CEED_TRANSPOSE) {
88*7f5b9731SStan Tomov         // Input (u) is Q-vector, output (v) is E-vector
89*7f5b9731SStan Tomov         // Element strides
90*7f5b9731SStan Tomov         v_elstride = ncomp * eldofssize;
91*7f5b9731SStan Tomov         u_elstride = elquadsize;
92*7f5b9731SStan Tomov         // Component strides
93*7f5b9731SStan Tomov         v_compstride = eldofssize;
94*7f5b9731SStan Tomov         u_compstride = nelem * elquadsize;
95*7f5b9731SStan Tomov     }
96*7f5b9731SStan Tomov 
97*7f5b9731SStan Tomov     // Loop through components and apply batch over elements
98*7f5b9731SStan Tomov     for (CeedInt comp_ctr = 0; comp_ctr < ncomp; comp_ctr++){
99*7f5b9731SStan Tomov            magmablas_dbasis_apply_batched_eval_interp(P, Q, dim, ncomp,
100*7f5b9731SStan Tomov 	   				       impl->dinterp1d, tmode,
101*7f5b9731SStan Tomov    					       u + u_compstride * comp_ctr, u_elstride,
102*7f5b9731SStan Tomov 					       v + v_compstride * comp_ctr, v_elstride,
103*7f5b9731SStan Tomov 					       nelem);
104*7f5b9731SStan Tomov      }
105*7f5b9731SStan Tomov 
106*7f5b9731SStan Tomov   }
107*7f5b9731SStan Tomov   if (emode & CEED_EVAL_GRAD) {
108*7f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
109*7f5b9731SStan Tomov     // In CEED_NOTRANSPOSE mode:
110*7f5b9731SStan Tomov     // u is (P^dim x nc), column-major layout (nc = ncomp)
111*7f5b9731SStan Tomov     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
112*7f5b9731SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
113*7f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
114*7f5b9731SStan Tomov       P = Q1d, Q = P1d;
115*7f5b9731SStan Tomov     }
116*7f5b9731SStan Tomov 
117*7f5b9731SStan Tomov     // Define element sizes for dofs/quad
118*7f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
119*7f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
120*7f5b9731SStan Tomov 
121*7f5b9731SStan Tomov      // E-vector ordering -------------- Q-vector ordering
122*7f5b9731SStan Tomov      //                                  dim
123*7f5b9731SStan Tomov      //  elem                             component
124*7f5b9731SStan Tomov      //    component                        elem
125*7f5b9731SStan Tomov      //       node                            node
126*7f5b9731SStan Tomov 
127*7f5b9731SStan Tomov 
128*7f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
129*7f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
130*7f5b9731SStan Tomov 
131*7f5b9731SStan Tomov     // Element strides
132*7f5b9731SStan Tomov     CeedInt u_elstride = ncomp * eldofssize;
133*7f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
134*7f5b9731SStan Tomov     // Component strides
135*7f5b9731SStan Tomov     CeedInt u_compstride = eldofssize;
136*7f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
137*7f5b9731SStan Tomov     // Dimension strides
138*7f5b9731SStan Tomov     CeedInt u_dimstride = 0;
139*7f5b9731SStan Tomov     CeedInt v_dimstride = nelem * elquadsize * ncomp;
140*7f5b9731SStan Tomov 
141*7f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
142*7f5b9731SStan Tomov     if(tmode == CEED_TRANSPOSE) {
143*7f5b9731SStan Tomov         // Input (u) is Q-vector, output (v) is E-vector
144*7f5b9731SStan Tomov         // Element strides
145*7f5b9731SStan Tomov         v_elstride = ncomp * eldofssize;
146*7f5b9731SStan Tomov         u_elstride = elquadsize;
147*7f5b9731SStan Tomov         // Component strides
148*7f5b9731SStan Tomov         v_compstride = eldofssize;
149*7f5b9731SStan Tomov         u_compstride = nelem * elquadsize;
150*7f5b9731SStan Tomov         // Dimension strides
151*7f5b9731SStan Tomov         v_dimstride = 0;
152*7f5b9731SStan Tomov         u_dimstride = nelem * elquadsize * ncomp;
153*7f5b9731SStan Tomov 
154*7f5b9731SStan Tomov       }
155*7f5b9731SStan Tomov 
156*7f5b9731SStan Tomov      // Loop through grad dimensions and components, batch call over elements
157*7f5b9731SStan Tomov      for(CeedInt dim_ctr = 0; dim_ctr < dim; dim_ctr++){
158*7f5b9731SStan Tomov         for (CeedInt comp_ctr = 0; comp_ctr < ncomp; comp_ctr++){
159*7f5b9731SStan Tomov              magmablas_dbasis_apply_batched_eval_grad(P, Q, dim, ncomp, nqpt,
160*7f5b9731SStan Tomov 	    				     impl->dinterp1d, impl->dgrad1d, tmode,
161*7f5b9731SStan Tomov    					     u + dim_ctr * u_dimstride + u_compstride * comp_ctr,
162*7f5b9731SStan Tomov                                              u_elstride,
163*7f5b9731SStan Tomov 					     v + dim_ctr * v_dimstride + v_compstride * comp_ctr,
164*7f5b9731SStan Tomov                                              v_elstride,
165*7f5b9731SStan Tomov 					     nelem, dim_ctr);
166*7f5b9731SStan Tomov         }
167*7f5b9731SStan Tomov      }
168*7f5b9731SStan Tomov    }
169*7f5b9731SStan Tomov   if (emode & CEED_EVAL_WEIGHT) {
170*7f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE)
171*7f5b9731SStan Tomov       // LCOV_EXCL_START
172*7f5b9731SStan Tomov       return CeedError(ceed, 1,
173*7f5b9731SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
174*7f5b9731SStan Tomov     // LCOV_EXCL_STOP
175*7f5b9731SStan Tomov     CeedInt Q = Q1d;
176*7f5b9731SStan Tomov     int eldofssize = CeedIntPow(Q, dim);
177*7f5b9731SStan Tomov     magmablas_dbasis_apply_batched_eval_weight(Q, dim, impl->dqweight1d,
178*7f5b9731SStan Tomov 					       v, eldofssize,
179*7f5b9731SStan Tomov 					       nelem);
180*7f5b9731SStan Tomov   }
181*7f5b9731SStan Tomov 
182*7f5b9731SStan Tomov   if(emode!=CEED_EVAL_WEIGHT) {
183*7f5b9731SStan Tomov     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr);
184*7f5b9731SStan Tomov   }
185*7f5b9731SStan Tomov   ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr);
186*7f5b9731SStan Tomov   return 0;
187*7f5b9731SStan Tomov }
188*7f5b9731SStan Tomov 
189*7f5b9731SStan Tomov #ifdef __cplusplus
190*7f5b9731SStan Tomov CEED_INTERN "C"
191*7f5b9731SStan Tomov #endif
192*7f5b9731SStan Tomov int CeedBasisDestroy_Magma(CeedBasis basis)
193*7f5b9731SStan Tomov {
194*7f5b9731SStan Tomov   int ierr;
195*7f5b9731SStan Tomov   CeedBasis_Magma *impl;
196*7f5b9731SStan Tomov   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
197*7f5b9731SStan Tomov 
198*7f5b9731SStan Tomov   ierr = magma_free(impl->dqref1d); CeedChk(ierr);
199*7f5b9731SStan Tomov   ierr = magma_free(impl->dinterp1d); CeedChk(ierr);
200*7f5b9731SStan Tomov   ierr = magma_free(impl->dgrad1d); CeedChk(ierr);
201*7f5b9731SStan Tomov   ierr = magma_free(impl->dqweight1d); CeedChk(ierr);
202*7f5b9731SStan Tomov 
203*7f5b9731SStan Tomov   ierr = CeedFree(&impl); CeedChk(ierr);
204*7f5b9731SStan Tomov 
205*7f5b9731SStan Tomov   return 0;
206*7f5b9731SStan Tomov }
207*7f5b9731SStan Tomov 
208*7f5b9731SStan Tomov #ifdef __cplusplus
209*7f5b9731SStan Tomov CEED_INTERN "C"
210*7f5b9731SStan Tomov #endif
211*7f5b9731SStan Tomov int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
212*7f5b9731SStan Tomov     CeedInt Q1d, const CeedScalar *interp1d,
213*7f5b9731SStan Tomov     const CeedScalar *grad1d,
214*7f5b9731SStan Tomov     const CeedScalar *qref1d,
215*7f5b9731SStan Tomov     const CeedScalar *qweight1d,
216*7f5b9731SStan Tomov     CeedBasis basis)
217*7f5b9731SStan Tomov {
218*7f5b9731SStan Tomov   int ierr;
219*7f5b9731SStan Tomov   CeedBasis_Magma *impl;
220*7f5b9731SStan Tomov   Ceed ceed;
221*7f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
222*7f5b9731SStan Tomov 
223*7f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
224*7f5b9731SStan Tomov                                 CeedBasisApply_Magma); CeedChk(ierr);
225*7f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
226*7f5b9731SStan Tomov                                 CeedBasisDestroy_Magma); CeedChk(ierr);
227*7f5b9731SStan Tomov 
228*7f5b9731SStan Tomov   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
229*7f5b9731SStan Tomov   ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr);
230*7f5b9731SStan Tomov 
231*7f5b9731SStan Tomov   // Copy qref1d to the GPU
232*7f5b9731SStan Tomov   ierr = magma_malloc((void**)&impl->dqref1d, Q1d*sizeof(qref1d[0]));
233*7f5b9731SStan Tomov   CeedChk(ierr);
234*7f5b9731SStan Tomov   magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1);
235*7f5b9731SStan Tomov 
236*7f5b9731SStan Tomov   // Copy interp1d to the GPU
237*7f5b9731SStan Tomov   ierr = magma_malloc((void**)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0]));
238*7f5b9731SStan Tomov   CeedChk(ierr);
239*7f5b9731SStan Tomov   magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1);
240*7f5b9731SStan Tomov 
241*7f5b9731SStan Tomov   // Copy grad1d to the GPU
242*7f5b9731SStan Tomov   ierr = magma_malloc((void**)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0]));
243*7f5b9731SStan Tomov   CeedChk(ierr);
244*7f5b9731SStan Tomov   magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1);
245*7f5b9731SStan Tomov 
246*7f5b9731SStan Tomov   // Copy qweight1d to the GPU
247*7f5b9731SStan Tomov   ierr = magma_malloc((void**)&impl->dqweight1d, Q1d*sizeof(qweight1d[0]));
248*7f5b9731SStan Tomov   CeedChk(ierr);
249*7f5b9731SStan Tomov   magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1);
250*7f5b9731SStan Tomov 
251*7f5b9731SStan Tomov   return 0;
252*7f5b9731SStan Tomov }
253*7f5b9731SStan Tomov 
254*7f5b9731SStan Tomov #ifdef __cplusplus
255*7f5b9731SStan Tomov CEED_INTERN "C"
256*7f5b9731SStan Tomov #endif
257*7f5b9731SStan Tomov int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
258*7f5b9731SStan Tomov                                    CeedInt ndof, CeedInt nqpts,
259*7f5b9731SStan Tomov                                    const CeedScalar *interp,
260*7f5b9731SStan Tomov                                    const CeedScalar *grad,
261*7f5b9731SStan Tomov                                    const CeedScalar *qref,
262*7f5b9731SStan Tomov                                    const CeedScalar *qweight,
263*7f5b9731SStan Tomov                                    CeedBasis basis)
264*7f5b9731SStan Tomov {
265*7f5b9731SStan Tomov   int ierr;
266*7f5b9731SStan Tomov   Ceed ceed;
267*7f5b9731SStan Tomov   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
268*7f5b9731SStan Tomov 
269*7f5b9731SStan Tomov   return CeedError(ceed, 1, "Backend does not implement non-tensor bases");
270*7f5b9731SStan Tomov }
271