xref: /libCEED/rust/libceed-sys/c-src/backends/blocked/ceed-blocked-operator.c (revision 7509a596beda7c1d002d2274a17375b09541fdb6)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 //------------------------------------------------------------------------------
21 // Setup Input/Output Fields
22 //------------------------------------------------------------------------------
23 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
24     CeedOperator op, bool inOrOut,
25     CeedElemRestriction *blkrestr,
26     CeedVector *fullevecs, CeedVector *evecs,
27     CeedVector *qvecs, CeedInt starte,
28     CeedInt numfields, CeedInt Q) {
29   CeedInt dim, ierr, ncomp, size, P;
30   Ceed ceed;
31   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
32   CeedBasis basis;
33   CeedElemRestriction r;
34   CeedOperatorField *opfields;
35   CeedQFunctionField *qffields;
36   if (inOrOut) {
37     ierr = CeedOperatorGetFields(op, NULL, &opfields);
38     CeedChk(ierr);
39     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
40     CeedChk(ierr);
41   } else {
42     ierr = CeedOperatorGetFields(op, &opfields, NULL);
43     CeedChk(ierr);
44     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
45     CeedChk(ierr);
46   }
47   const CeedInt blksize = 8;
48 
49   // Loop over fields
50   for (CeedInt i=0; i<numfields; i++) {
51     CeedEvalMode emode;
52     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
53 
54     if (emode != CEED_EVAL_WEIGHT) {
55       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
56       CeedChk(ierr);
57       CeedElemRestriction_Ref *data;
58       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
59       Ceed ceed;
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
61       CeedInt nelem, elemsize, nnodes;
62       CeedInterlaceMode imode;
63       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
64       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
65       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
66       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
67       if (data->indices) {
68         ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr);
69         ierr = CeedElemRestrictionCreateBlocked(ceed, imode, nelem, elemsize,
70                                                 blksize, nnodes, ncomp,
71                                                 CEED_MEM_HOST, CEED_COPY_VALUES,
72                                                 data->indices,
73                                                 &blkrestr[i+starte]);
74         CeedChk(ierr);
75       } else {
76         CeedInt strides[3];
77         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
78         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize,
79                blksize, nnodes, ncomp, strides, &blkrestr[i+starte]);
80         CeedChk(ierr);
81       }
82       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
83                                              &fullevecs[i+starte]);
84       CeedChk(ierr);
85     }
86 
87     switch(emode) {
88     case CEED_EVAL_NONE:
89       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
90       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
91       break;
92     case CEED_EVAL_INTERP:
93       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
94       ierr = CeedElemRestrictionGetElementSize(r, &P);
95       CeedChk(ierr);
96       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
97       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
98       break;
99     case CEED_EVAL_GRAD:
100       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
101       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
102       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
103       ierr = CeedElemRestrictionGetElementSize(r, &P);
104       CeedChk(ierr);
105       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
106       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
107       break;
108     case CEED_EVAL_WEIGHT: // Only on input fields
109       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
110       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
111       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
112                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
113       CeedChk(ierr);
114 
115       break;
116     case CEED_EVAL_DIV:
117       break; // Not implemented
118     case CEED_EVAL_CURL:
119       break; // Not implemented
120     }
121   }
122   return 0;
123 }
124 
125 //------------------------------------------------------------------------------
126 // Setup Operator
127 //------------------------------------------------------------------------------
128 static int CeedOperatorSetup_Blocked(CeedOperator op) {
129   int ierr;
130   bool setupdone;
131   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
132   if (setupdone) return 0;
133   Ceed ceed;
134   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
135   CeedOperator_Blocked *impl;
136   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
137   CeedQFunction qf;
138   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
139   CeedInt Q, numinputfields, numoutputfields;
140   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
141   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
142   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
143   CeedChk(ierr);
144   CeedOperatorField *opinputfields, *opoutputfields;
145   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
146   CeedChk(ierr);
147   CeedQFunctionField *qfinputfields, *qfoutputfields;
148   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
149   CeedChk(ierr);
150 
151   // Allocate
152   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
153   CeedChk(ierr);
154   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
155   CeedChk(ierr);
156   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
157   CeedChk(ierr);
158 
159   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
160   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
161   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
162   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
163   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
164 
165   impl->numein = numinputfields; impl->numeout = numoutputfields;
166 
167   // Set up infield and outfield pointer arrays
168   // Infields
169   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
170                                          impl->evecs, impl->evecsin,
171                                          impl->qvecsin, 0,
172                                          numinputfields, Q);
173   CeedChk(ierr);
174   // Outfields
175   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
176                                          impl->evecs, impl->evecsout,
177                                          impl->qvecsout, numinputfields,
178                                          numoutputfields, Q);
179   CeedChk(ierr);
180 
181   // Identity QFunctions
182   if (impl->identityqf) {
183     CeedEvalMode inmode, outmode;
184     CeedQFunctionField *infields, *outfields;
185     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
186 
187     for (CeedInt i=0; i<numinputfields; i++) {
188       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
189       CeedChk(ierr);
190       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
191       CeedChk(ierr);
192 
193       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
194       impl->qvecsout[i] = impl->qvecsin[i];
195       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
196     }
197   }
198 
199   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
200 
201   return 0;
202 }
203 
204 //------------------------------------------------------------------------------
205 // Setup Operator Inputs
206 //------------------------------------------------------------------------------
207 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
208     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
209     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
210     CeedRequest *request) {
211   CeedInt ierr;
212   CeedEvalMode emode;
213   CeedVector vec;
214   uint64_t state;
215 
216   for (CeedInt i=0; i<numinputfields; i++) {
217     // Get input vector
218     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
219     if (vec == CEED_VECTOR_ACTIVE) {
220       if (skipactive)
221         continue;
222       else
223         vec = invec;
224     }
225 
226     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
227     CeedChk(ierr);
228     if (emode == CEED_EVAL_WEIGHT) { // Skip
229     } else {
230       // Restrict
231       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
232       if (state != impl->inputstate[i] || vec == invec) {
233         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
234                                         vec, impl->evecs[i], request);
235         CeedChk(ierr);
236         impl->inputstate[i] = state;
237       }
238       // Get evec
239       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
240                                     (const CeedScalar **) &impl->edata[i]);
241       CeedChk(ierr);
242     }
243   }
244   return 0;
245 }
246 
247 //------------------------------------------------------------------------------
248 // Input Basis Action
249 //------------------------------------------------------------------------------
250 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
251     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
252     CeedInt numinputfields, CeedInt blksize, bool skipactive,
253     CeedOperator_Blocked *impl) {
254   CeedInt ierr;
255   CeedInt dim, elemsize, size;
256   CeedElemRestriction Erestrict;
257   CeedEvalMode emode;
258   CeedBasis basis;
259 
260   for (CeedInt i=0; i<numinputfields; i++) {
261     // Skip active input
262     if (skipactive) {
263       CeedVector vec;
264       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
265       if (vec == CEED_VECTOR_ACTIVE)
266         continue;
267     }
268 
269     // Get elemsize, emode, size
270     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
271     CeedChk(ierr);
272     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
273     CeedChk(ierr);
274     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
275     CeedChk(ierr);
276     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
277     // Basis action
278     switch(emode) {
279     case CEED_EVAL_NONE:
280       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
281                                 CEED_USE_POINTER,
282                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
283       break;
284     case CEED_EVAL_INTERP:
285       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
286       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
287                                 CEED_USE_POINTER,
288                                 &impl->edata[i][e*elemsize*size]);
289       CeedChk(ierr);
290       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
291                             CEED_EVAL_INTERP, impl->evecsin[i],
292                             impl->qvecsin[i]); CeedChk(ierr);
293       break;
294     case CEED_EVAL_GRAD:
295       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
296       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
297       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
298                                 CEED_USE_POINTER,
299                                 &impl->edata[i][e*elemsize*size/dim]);
300       CeedChk(ierr);
301       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
302                             CEED_EVAL_GRAD, impl->evecsin[i],
303                             impl->qvecsin[i]); CeedChk(ierr);
304       break;
305     case CEED_EVAL_WEIGHT:
306       break;  // No action
307     case CEED_EVAL_DIV:
308     case CEED_EVAL_CURL: {
309       // LCOV_EXCL_START
310       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
311       CeedChk(ierr);
312       Ceed ceed;
313       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
314       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
315       // LCOV_EXCL_STOP
316       break; // Not implemented
317     }
318     }
319   }
320   return 0;
321 }
322 
323 //------------------------------------------------------------------------------
324 // Output Basis Action
325 //------------------------------------------------------------------------------
326 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
327     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
328     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
329     CeedOperator op, CeedOperator_Blocked *impl) {
330   CeedInt ierr;
331   CeedInt dim, elemsize, size;
332   CeedElemRestriction Erestrict;
333   CeedEvalMode emode;
334   CeedBasis basis;
335 
336   for (CeedInt i=0; i<numoutputfields; i++) {
337     // Get elemsize, emode, size
338     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
339     CeedChk(ierr);
340     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
341     CeedChk(ierr);
342     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
343     CeedChk(ierr);
344     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
345     // Basis action
346     switch(emode) {
347     case CEED_EVAL_NONE:
348       break; // No action
349     case CEED_EVAL_INTERP:
350       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
351       CeedChk(ierr);
352       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
353                                 CEED_USE_POINTER,
354                                 &impl->edata[i + numinputfields][e*elemsize*size]);
355       CeedChk(ierr);
356       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
357                             CEED_EVAL_INTERP, impl->qvecsout[i],
358                             impl->evecsout[i]); CeedChk(ierr);
359       break;
360     case CEED_EVAL_GRAD:
361       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
362       CeedChk(ierr);
363       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
364       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
365                                 CEED_USE_POINTER,
366                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
367       CeedChk(ierr);
368       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
369                             CEED_EVAL_GRAD, impl->qvecsout[i],
370                             impl->evecsout[i]); CeedChk(ierr);
371       break;
372     case CEED_EVAL_WEIGHT: {
373       // LCOV_EXCL_START
374       Ceed ceed;
375       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
376       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
377                        "evaluation mode");
378       // LCOV_EXCL_STOP
379       break; // Should not occur
380     }
381     case CEED_EVAL_DIV:
382     case CEED_EVAL_CURL: {
383       // LCOV_EXCL_START
384       Ceed ceed;
385       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
386       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
387       // LCOV_EXCL_STOP
388       break; // Not implemented
389     }
390     }
391   }
392   return 0;
393 }
394 
395 //------------------------------------------------------------------------------
396 // Restore Input Vectors
397 //------------------------------------------------------------------------------
398 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
399     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
400     bool skipactive, CeedOperator_Blocked *impl) {
401   CeedInt ierr;
402   CeedEvalMode emode;
403 
404   for (CeedInt i=0; i<numinputfields; i++) {
405     // Skip active inputs
406     if (skipactive) {
407       CeedVector vec;
408       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
409       if (vec == CEED_VECTOR_ACTIVE)
410         continue;
411     }
412     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
413     CeedChk(ierr);
414     if (emode == CEED_EVAL_WEIGHT) { // Skip
415     } else {
416       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
417                                         (const CeedScalar **) &impl->edata[i]);
418       CeedChk(ierr);
419     }
420   }
421   return 0;
422 }
423 
424 //------------------------------------------------------------------------------
425 // Operator Apply
426 //------------------------------------------------------------------------------
427 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
428                                      CeedVector outvec,
429                                      CeedRequest *request) {
430   int ierr;
431   CeedOperator_Blocked *impl;
432   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
433   const CeedInt blksize = 8;
434   CeedInt Q, numinputfields, numoutputfields, numelements, size;
435   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
436   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
437   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
438   CeedQFunction qf;
439   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
440   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
441   CeedChk(ierr);
442   CeedOperatorField *opinputfields, *opoutputfields;
443   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
444   CeedChk(ierr);
445   CeedQFunctionField *qfinputfields, *qfoutputfields;
446   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
447   CeedChk(ierr);
448   CeedEvalMode emode;
449   CeedVector vec;
450 
451   // Setup
452   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
453 
454   // Input Evecs and Restriction
455   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
456                                          opinputfields, invec, false, impl,
457                                          request); CeedChk(ierr);
458 
459   // Output Evecs
460   for (CeedInt i=0; i<numoutputfields; i++) {
461     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
462                               &impl->edata[i + numinputfields]); CeedChk(ierr);
463   }
464 
465   // Loop through elements
466   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
467     // Output pointers
468     for (CeedInt i=0; i<numoutputfields; i++) {
469       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
470       CeedChk(ierr);
471       if (emode == CEED_EVAL_NONE) {
472         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
473         CeedChk(ierr);
474         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
475                                   CEED_USE_POINTER,
476                                   &impl->edata[i + numinputfields][e*Q*size]);
477         CeedChk(ierr);
478       }
479     }
480 
481     // Input basis apply
482     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
483                                           numinputfields, blksize, false, impl);
484     CeedChk(ierr);
485 
486     // Q function
487     if (!impl->identityqf) {
488       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
489       CeedChk(ierr);
490     }
491 
492     // Output basis apply
493     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
494                                            blksize, numinputfields,
495                                            numoutputfields, op, impl);
496     CeedChk(ierr);
497   }
498 
499   // Output restriction
500   for (CeedInt i=0; i<numoutputfields; i++) {
501     // Restore evec
502     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
503                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
504     // Get output vector
505     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
506     // Active
507     if (vec == CEED_VECTOR_ACTIVE)
508       vec = outvec;
509     // Restrict
510     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein],
511                                     CEED_TRANSPOSE, impl->evecs[i+impl->numein],
512                                     vec, request); CeedChk(ierr);
513 
514   }
515 
516   // Restore input arrays
517   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
518          opinputfields, false, impl); CeedChk(ierr);
519 
520   return 0;
521 }
522 
523 //------------------------------------------------------------------------------
524 // Assemble Linear QFunction
525 //------------------------------------------------------------------------------
526 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
527     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
528   int ierr;
529   CeedOperator_Blocked *impl;
530   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
531   const CeedInt blksize = 8;
532   CeedInt Q, numinputfields, numoutputfields, numelements, size;
533   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
534   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
535   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
536   CeedQFunction qf;
537   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
538   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
539   CeedChk(ierr);
540   CeedOperatorField *opinputfields, *opoutputfields;
541   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
542   CeedChk(ierr);
543   CeedQFunctionField *qfinputfields, *qfoutputfields;
544   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
545   CeedChk(ierr);
546   CeedVector vec, lvec;
547   CeedInt numactivein = 0, numactiveout = 0;
548   CeedVector *activein = NULL;
549   CeedScalar *a, *tmp;
550   Ceed ceed;
551   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
552 
553   // Setup
554   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
555 
556   // Check for identity
557   if (impl->identityqf)
558     // LCOV_EXCL_START
559     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
560   // LCOV_EXCL_STOP
561 
562   // Input Evecs and Restriction
563   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
564                                          opinputfields, NULL, true, impl,
565                                          request); CeedChk(ierr);
566 
567   // Count number of active input fields
568   for (CeedInt i=0; i<numinputfields; i++) {
569     // Get input vector
570     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
571     // Check if active input
572     if (vec == CEED_VECTOR_ACTIVE) {
573       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
574       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
575       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
576       CeedChk(ierr);
577       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
578       for (CeedInt field=0; field<size; field++) {
579         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
580         CeedChk(ierr);
581         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
582                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
583         CeedChk(ierr);
584       }
585       numactivein += size;
586       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
587     }
588   }
589 
590   // Count number of active output fields
591   for (CeedInt i=0; i<numoutputfields; i++) {
592     // Get output vector
593     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
594     // Check if active output
595     if (vec == CEED_VECTOR_ACTIVE) {
596       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
597       numactiveout += size;
598     }
599   }
600 
601   // Check sizes
602   if (!numactivein || !numactiveout)
603     // LCOV_EXCL_START
604     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
605                      "and outputs");
606   // LCOV_EXCL_STOP
607 
608   // Setup lvec
609   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
610                           &lvec); CeedChk(ierr);
611   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
612 
613   // Create output restriction
614   CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q};
615   ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q, numelements*Q,
616                                           numactivein*numactiveout, strides, rstr); CeedChk(ierr);
617   // Create assembled vector
618   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
619                           assembled); CeedChk(ierr);
620 
621   // Loop through elements
622   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
623     // Input basis apply
624     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
625                                           numinputfields, blksize, true, impl);
626     CeedChk(ierr);
627 
628     // Assemble QFunction
629     for (CeedInt in=0; in<numactivein; in++) {
630       // Set Inputs
631       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
632       if (numactivein > 1) {
633         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
634                                   0.0); CeedChk(ierr);
635       }
636       // Set Outputs
637       for (CeedInt out=0; out<numoutputfields; out++) {
638         // Get output vector
639         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
640         CeedChk(ierr);
641         // Check if active output
642         if (vec == CEED_VECTOR_ACTIVE) {
643           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
644                              CEED_USE_POINTER, a); CeedChk(ierr);
645           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
646           CeedChk(ierr);
647           a += size*Q*blksize; // Advance the pointer by the size of the output
648         }
649       }
650       // Apply QFunction
651       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
652       CeedChk(ierr);
653     }
654   }
655 
656   // Un-set output Qvecs to prevent accidental overwrite of Assembled
657   for (CeedInt out=0; out<numoutputfields; out++) {
658     // Get output vector
659     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
660     CeedChk(ierr);
661     // Check if active output
662     if (vec == CEED_VECTOR_ACTIVE) {
663       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
664                          NULL); CeedChk(ierr);
665     }
666   }
667 
668   // Restore input arrays
669   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
670          opinputfields, true, impl); CeedChk(ierr);
671 
672   // Output blocked restriction
673   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
674   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
675   CeedElemRestriction blkrstr;
676   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize,
677          numelements*Q,
678          numactivein*numactiveout,
679          strides, &blkrstr);
680   CeedChk(ierr);
681   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled,
682                                   request); CeedChk(ierr);
683 
684   // Cleanup
685   for (CeedInt i=0; i<numactivein; i++) {
686     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
687   }
688   ierr = CeedFree(&activein); CeedChk(ierr);
689   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
690   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
691 
692   return 0;
693 }
694 
695 //------------------------------------------------------------------------------
696 // Operator Destroy
697 //------------------------------------------------------------------------------
698 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
699   int ierr;
700   CeedOperator_Blocked *impl;
701   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
702 
703   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
704     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
705     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
706   }
707   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
708   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
709   ierr = CeedFree(&impl->edata); CeedChk(ierr);
710   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
711 
712   for (CeedInt i=0; i<impl->numein; i++) {
713     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
714     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
715   }
716   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
717   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
718 
719   for (CeedInt i=0; i<impl->numeout; i++) {
720     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
721     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
722   }
723   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
724   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
725 
726   ierr = CeedFree(&impl); CeedChk(ierr);
727   return 0;
728 }
729 
730 //------------------------------------------------------------------------------
731 // Operator Create
732 //------------------------------------------------------------------------------
733 int CeedOperatorCreate_Blocked(CeedOperator op) {
734   int ierr;
735   Ceed ceed;
736   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
737   CeedOperator_Blocked *impl;
738 
739   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
740   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
741 
742   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
743                                 CeedOperatorAssembleLinearQFunction_Blocked);
744   CeedChk(ierr);
745   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
746                                 CeedOperatorApply_Blocked); CeedChk(ierr);
747   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
748                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
749   return 0;
750 }
751 //------------------------------------------------------------------------------
752