1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include "ceed-blocked.h" 18 #include "../ref/ceed-ref.h" 19 20 //------------------------------------------------------------------------------ 21 // Setup Input/Output Fields 22 //------------------------------------------------------------------------------ 23 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 24 CeedOperator op, bool inOrOut, 25 CeedElemRestriction *blkrestr, 26 CeedVector *fullevecs, CeedVector *evecs, 27 CeedVector *qvecs, CeedInt starte, 28 CeedInt numfields, CeedInt Q) { 29 CeedInt dim, ierr, ncomp, size, P; 30 Ceed ceed; 31 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 32 CeedBasis basis; 33 CeedElemRestriction r; 34 CeedOperatorField *opfields; 35 CeedQFunctionField *qffields; 36 if (inOrOut) { 37 ierr = CeedOperatorGetFields(op, NULL, &opfields); 38 CeedChk(ierr); 39 ierr = CeedQFunctionGetFields(qf, NULL, &qffields); 40 CeedChk(ierr); 41 } else { 42 ierr = CeedOperatorGetFields(op, &opfields, NULL); 43 CeedChk(ierr); 44 ierr = CeedQFunctionGetFields(qf, &qffields, NULL); 45 CeedChk(ierr); 46 } 47 const CeedInt blksize = 8; 48 49 // Loop over fields 50 for (CeedInt i=0; i<numfields; i++) { 51 CeedEvalMode emode; 52 ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr); 53 54 if (emode != CEED_EVAL_WEIGHT) { 55 ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r); 56 CeedChk(ierr); 57 CeedElemRestriction_Ref *data; 58 ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr); 59 Ceed ceed; 60 ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 61 CeedInt nelem, elemsize, nnodes; 62 CeedInterlaceMode imode; 63 ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 64 ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 65 ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr); 66 ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 67 if (data->indices) { 68 ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr); 69 ierr = CeedElemRestrictionCreateBlocked(ceed, imode, nelem, elemsize, 70 blksize, nnodes, ncomp, 71 CEED_MEM_HOST, CEED_COPY_VALUES, 72 data->indices, 73 &blkrestr[i+starte]); 74 CeedChk(ierr); 75 } else { 76 CeedInt strides[3]; 77 ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 78 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize, 79 blksize, nnodes, ncomp, strides, &blkrestr[i+starte]); 80 CeedChk(ierr); 81 } 82 ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL, 83 &fullevecs[i+starte]); 84 CeedChk(ierr); 85 } 86 87 switch(emode) { 88 case CEED_EVAL_NONE: 89 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 90 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 91 break; 92 case CEED_EVAL_INTERP: 93 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 94 ierr = CeedElemRestrictionGetElementSize(r, &P); 95 CeedChk(ierr); 96 ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr); 97 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 98 break; 99 case CEED_EVAL_GRAD: 100 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 101 ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr); 102 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 103 ierr = CeedElemRestrictionGetElementSize(r, &P); 104 CeedChk(ierr); 105 ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr); 106 ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr); 107 break; 108 case CEED_EVAL_WEIGHT: // Only on input fields 109 ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr); 110 ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr); 111 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 112 CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]); 113 CeedChk(ierr); 114 115 break; 116 case CEED_EVAL_DIV: 117 break; // Not implemented 118 case CEED_EVAL_CURL: 119 break; // Not implemented 120 } 121 } 122 return 0; 123 } 124 125 //------------------------------------------------------------------------------ 126 // Setup Operator 127 //------------------------------------------------------------------------------ 128 static int CeedOperatorSetup_Blocked(CeedOperator op) { 129 int ierr; 130 bool setupdone; 131 ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr); 132 if (setupdone) return 0; 133 Ceed ceed; 134 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 135 CeedOperator_Blocked *impl; 136 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 137 CeedQFunction qf; 138 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 139 CeedInt Q, numinputfields, numoutputfields; 140 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 141 ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr); 142 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 143 CeedChk(ierr); 144 CeedOperatorField *opinputfields, *opoutputfields; 145 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 146 CeedChk(ierr); 147 CeedQFunctionField *qfinputfields, *qfoutputfields; 148 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 149 CeedChk(ierr); 150 151 // Allocate 152 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr); 153 CeedChk(ierr); 154 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs); 155 CeedChk(ierr); 156 ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata); 157 CeedChk(ierr); 158 159 ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr); 160 ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr); 161 ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr); 162 ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr); 163 ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr); 164 165 impl->numein = numinputfields; impl->numeout = numoutputfields; 166 167 // Set up infield and outfield pointer arrays 168 // Infields 169 ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr, 170 impl->evecs, impl->evecsin, 171 impl->qvecsin, 0, 172 numinputfields, Q); 173 CeedChk(ierr); 174 // Outfields 175 ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr, 176 impl->evecs, impl->evecsout, 177 impl->qvecsout, numinputfields, 178 numoutputfields, Q); 179 CeedChk(ierr); 180 181 // Identity QFunctions 182 if (impl->identityqf) { 183 CeedEvalMode inmode, outmode; 184 CeedQFunctionField *infields, *outfields; 185 ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr); 186 187 for (CeedInt i=0; i<numinputfields; i++) { 188 ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode); 189 CeedChk(ierr); 190 ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode); 191 CeedChk(ierr); 192 193 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 194 impl->qvecsout[i] = impl->qvecsin[i]; 195 ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr); 196 } 197 } 198 199 ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr); 200 201 return 0; 202 } 203 204 //------------------------------------------------------------------------------ 205 // Setup Operator Inputs 206 //------------------------------------------------------------------------------ 207 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields, 208 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 209 CeedVector invec, bool skipactive, CeedOperator_Blocked *impl, 210 CeedRequest *request) { 211 CeedInt ierr; 212 CeedEvalMode emode; 213 CeedVector vec; 214 uint64_t state; 215 216 for (CeedInt i=0; i<numinputfields; i++) { 217 // Get input vector 218 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 219 if (vec == CEED_VECTOR_ACTIVE) { 220 if (skipactive) 221 continue; 222 else 223 vec = invec; 224 } 225 226 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 227 CeedChk(ierr); 228 if (emode == CEED_EVAL_WEIGHT) { // Skip 229 } else { 230 // Restrict 231 ierr = CeedVectorGetState(vec, &state); CeedChk(ierr); 232 if (state != impl->inputstate[i] || vec == invec) { 233 ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE, 234 vec, impl->evecs[i], request); 235 CeedChk(ierr); 236 impl->inputstate[i] = state; 237 } 238 // Get evec 239 ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST, 240 (const CeedScalar **) &impl->edata[i]); 241 CeedChk(ierr); 242 } 243 } 244 return 0; 245 } 246 247 //------------------------------------------------------------------------------ 248 // Input Basis Action 249 //------------------------------------------------------------------------------ 250 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 251 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 252 CeedInt numinputfields, CeedInt blksize, bool skipactive, 253 CeedOperator_Blocked *impl) { 254 CeedInt ierr; 255 CeedInt dim, elemsize, size; 256 CeedElemRestriction Erestrict; 257 CeedEvalMode emode; 258 CeedBasis basis; 259 260 for (CeedInt i=0; i<numinputfields; i++) { 261 // Skip active input 262 if (skipactive) { 263 CeedVector vec; 264 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 265 if (vec == CEED_VECTOR_ACTIVE) 266 continue; 267 } 268 269 // Get elemsize, emode, size 270 ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict); 271 CeedChk(ierr); 272 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 273 CeedChk(ierr); 274 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 275 CeedChk(ierr); 276 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 277 // Basis action 278 switch(emode) { 279 case CEED_EVAL_NONE: 280 ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST, 281 CEED_USE_POINTER, 282 &impl->edata[i][e*Q*size]); CeedChk(ierr); 283 break; 284 case CEED_EVAL_INTERP: 285 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 286 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 287 CEED_USE_POINTER, 288 &impl->edata[i][e*elemsize*size]); 289 CeedChk(ierr); 290 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 291 CEED_EVAL_INTERP, impl->evecsin[i], 292 impl->qvecsin[i]); CeedChk(ierr); 293 break; 294 case CEED_EVAL_GRAD: 295 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr); 296 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 297 ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST, 298 CEED_USE_POINTER, 299 &impl->edata[i][e*elemsize*size/dim]); 300 CeedChk(ierr); 301 ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE, 302 CEED_EVAL_GRAD, impl->evecsin[i], 303 impl->qvecsin[i]); CeedChk(ierr); 304 break; 305 case CEED_EVAL_WEIGHT: 306 break; // No action 307 case CEED_EVAL_DIV: 308 case CEED_EVAL_CURL: { 309 // LCOV_EXCL_START 310 ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); 311 CeedChk(ierr); 312 Ceed ceed; 313 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 314 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 315 // LCOV_EXCL_STOP 316 break; // Not implemented 317 } 318 } 319 } 320 return 0; 321 } 322 323 //------------------------------------------------------------------------------ 324 // Output Basis Action 325 //------------------------------------------------------------------------------ 326 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 327 CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields, 328 CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields, 329 CeedOperator op, CeedOperator_Blocked *impl) { 330 CeedInt ierr; 331 CeedInt dim, elemsize, size; 332 CeedElemRestriction Erestrict; 333 CeedEvalMode emode; 334 CeedBasis basis; 335 336 for (CeedInt i=0; i<numoutputfields; i++) { 337 // Get elemsize, emode, size 338 ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict); 339 CeedChk(ierr); 340 ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize); 341 CeedChk(ierr); 342 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 343 CeedChk(ierr); 344 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 345 // Basis action 346 switch(emode) { 347 case CEED_EVAL_NONE: 348 break; // No action 349 case CEED_EVAL_INTERP: 350 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 351 CeedChk(ierr); 352 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 353 CEED_USE_POINTER, 354 &impl->edata[i + numinputfields][e*elemsize*size]); 355 CeedChk(ierr); 356 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 357 CEED_EVAL_INTERP, impl->qvecsout[i], 358 impl->evecsout[i]); CeedChk(ierr); 359 break; 360 case CEED_EVAL_GRAD: 361 ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); 362 CeedChk(ierr); 363 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 364 ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST, 365 CEED_USE_POINTER, 366 &impl->edata[i + numinputfields][e*elemsize*size/dim]); 367 CeedChk(ierr); 368 ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE, 369 CEED_EVAL_GRAD, impl->qvecsout[i], 370 impl->evecsout[i]); CeedChk(ierr); 371 break; 372 case CEED_EVAL_WEIGHT: { 373 // LCOV_EXCL_START 374 Ceed ceed; 375 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 376 return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output " 377 "evaluation mode"); 378 // LCOV_EXCL_STOP 379 break; // Should not occur 380 } 381 case CEED_EVAL_DIV: 382 case CEED_EVAL_CURL: { 383 // LCOV_EXCL_START 384 Ceed ceed; 385 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 386 return CeedError(ceed, 1, "Ceed evaluation mode not implemented"); 387 // LCOV_EXCL_STOP 388 break; // Not implemented 389 } 390 } 391 } 392 return 0; 393 } 394 395 //------------------------------------------------------------------------------ 396 // Restore Input Vectors 397 //------------------------------------------------------------------------------ 398 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields, 399 CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields, 400 bool skipactive, CeedOperator_Blocked *impl) { 401 CeedInt ierr; 402 CeedEvalMode emode; 403 404 for (CeedInt i=0; i<numinputfields; i++) { 405 // Skip active inputs 406 if (skipactive) { 407 CeedVector vec; 408 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 409 if (vec == CEED_VECTOR_ACTIVE) 410 continue; 411 } 412 ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 413 CeedChk(ierr); 414 if (emode == CEED_EVAL_WEIGHT) { // Skip 415 } else { 416 ierr = CeedVectorRestoreArrayRead(impl->evecs[i], 417 (const CeedScalar **) &impl->edata[i]); 418 CeedChk(ierr); 419 } 420 } 421 return 0; 422 } 423 424 //------------------------------------------------------------------------------ 425 // Operator Apply 426 //------------------------------------------------------------------------------ 427 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec, 428 CeedVector outvec, 429 CeedRequest *request) { 430 int ierr; 431 CeedOperator_Blocked *impl; 432 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 433 const CeedInt blksize = 8; 434 CeedInt Q, numinputfields, numoutputfields, numelements, size; 435 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 436 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 437 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 438 CeedQFunction qf; 439 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 440 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 441 CeedChk(ierr); 442 CeedOperatorField *opinputfields, *opoutputfields; 443 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 444 CeedChk(ierr); 445 CeedQFunctionField *qfinputfields, *qfoutputfields; 446 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 447 CeedChk(ierr); 448 CeedEvalMode emode; 449 CeedVector vec; 450 451 // Setup 452 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 453 454 // Input Evecs and Restriction 455 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 456 opinputfields, invec, false, impl, 457 request); CeedChk(ierr); 458 459 // Output Evecs 460 for (CeedInt i=0; i<numoutputfields; i++) { 461 ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST, 462 &impl->edata[i + numinputfields]); CeedChk(ierr); 463 } 464 465 // Loop through elements 466 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 467 // Output pointers 468 for (CeedInt i=0; i<numoutputfields; i++) { 469 ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 470 CeedChk(ierr); 471 if (emode == CEED_EVAL_NONE) { 472 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); 473 CeedChk(ierr); 474 ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST, 475 CEED_USE_POINTER, 476 &impl->edata[i + numinputfields][e*Q*size]); 477 CeedChk(ierr); 478 } 479 } 480 481 // Input basis apply 482 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 483 numinputfields, blksize, false, impl); 484 CeedChk(ierr); 485 486 // Q function 487 if (!impl->identityqf) { 488 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 489 CeedChk(ierr); 490 } 491 492 // Output basis apply 493 ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields, 494 blksize, numinputfields, 495 numoutputfields, op, impl); 496 CeedChk(ierr); 497 } 498 499 // Output restriction 500 for (CeedInt i=0; i<numoutputfields; i++) { 501 // Restore evec 502 ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein], 503 &impl->edata[i + numinputfields]); CeedChk(ierr); 504 // Get output vector 505 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 506 // Active 507 if (vec == CEED_VECTOR_ACTIVE) 508 vec = outvec; 509 // Restrict 510 ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], 511 CEED_TRANSPOSE, impl->evecs[i+impl->numein], 512 vec, request); CeedChk(ierr); 513 514 } 515 516 // Restore input arrays 517 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 518 opinputfields, false, impl); CeedChk(ierr); 519 520 return 0; 521 } 522 523 //------------------------------------------------------------------------------ 524 // Assemble Linear QFunction 525 //------------------------------------------------------------------------------ 526 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op, 527 CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 528 int ierr; 529 CeedOperator_Blocked *impl; 530 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 531 const CeedInt blksize = 8; 532 CeedInt Q, numinputfields, numoutputfields, numelements, size; 533 ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr); 534 ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr); 535 CeedInt nblks = (numelements/blksize) + !!(numelements%blksize); 536 CeedQFunction qf; 537 ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); 538 ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 539 CeedChk(ierr); 540 CeedOperatorField *opinputfields, *opoutputfields; 541 ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 542 CeedChk(ierr); 543 CeedQFunctionField *qfinputfields, *qfoutputfields; 544 ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 545 CeedChk(ierr); 546 CeedVector vec, lvec; 547 CeedInt numactivein = 0, numactiveout = 0; 548 CeedVector *activein = NULL; 549 CeedScalar *a, *tmp; 550 Ceed ceed; 551 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 552 553 // Setup 554 ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr); 555 556 // Check for identity 557 if (impl->identityqf) 558 // LCOV_EXCL_START 559 return CeedError(ceed, 1, "Assembling identity qfunctions not supported"); 560 // LCOV_EXCL_STOP 561 562 // Input Evecs and Restriction 563 ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields, 564 opinputfields, NULL, true, impl, 565 request); CeedChk(ierr); 566 567 // Count number of active input fields 568 for (CeedInt i=0; i<numinputfields; i++) { 569 // Get input vector 570 ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr); 571 // Check if active input 572 if (vec == CEED_VECTOR_ACTIVE) { 573 ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr); 574 ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr); 575 ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp); 576 CeedChk(ierr); 577 ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr); 578 for (CeedInt field=0; field<size; field++) { 579 ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]); 580 CeedChk(ierr); 581 ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST, 582 CEED_USE_POINTER, &tmp[field*Q*blksize]); 583 CeedChk(ierr); 584 } 585 numactivein += size; 586 ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr); 587 } 588 } 589 590 // Count number of active output fields 591 for (CeedInt i=0; i<numoutputfields; i++) { 592 // Get output vector 593 ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr); 594 // Check if active output 595 if (vec == CEED_VECTOR_ACTIVE) { 596 ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr); 597 numactiveout += size; 598 } 599 } 600 601 // Check sizes 602 if (!numactivein || !numactiveout) 603 // LCOV_EXCL_START 604 return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs " 605 "and outputs"); 606 // LCOV_EXCL_STOP 607 608 // Setup lvec 609 ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout, 610 &lvec); CeedChk(ierr); 611 ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr); 612 613 // Create output restriction 614 CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q}; 615 ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q, numelements*Q, 616 numactivein*numactiveout, strides, rstr); CeedChk(ierr); 617 // Create assembled vector 618 ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout, 619 assembled); CeedChk(ierr); 620 621 // Loop through elements 622 for (CeedInt e=0; e<nblks*blksize; e+=blksize) { 623 // Input basis apply 624 ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields, 625 numinputfields, blksize, true, impl); 626 CeedChk(ierr); 627 628 // Assemble QFunction 629 for (CeedInt in=0; in<numactivein; in++) { 630 // Set Inputs 631 ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr); 632 if (numactivein > 1) { 633 ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein], 634 0.0); CeedChk(ierr); 635 } 636 // Set Outputs 637 for (CeedInt out=0; out<numoutputfields; out++) { 638 // Get output vector 639 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 640 CeedChk(ierr); 641 // Check if active output 642 if (vec == CEED_VECTOR_ACTIVE) { 643 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, 644 CEED_USE_POINTER, a); CeedChk(ierr); 645 ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size); 646 CeedChk(ierr); 647 a += size*Q*blksize; // Advance the pointer by the size of the output 648 } 649 } 650 // Apply QFunction 651 ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout); 652 CeedChk(ierr); 653 } 654 } 655 656 // Un-set output Qvecs to prevent accidental overwrite of Assembled 657 for (CeedInt out=0; out<numoutputfields; out++) { 658 // Get output vector 659 ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec); 660 CeedChk(ierr); 661 // Check if active output 662 if (vec == CEED_VECTOR_ACTIVE) { 663 CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES, 664 NULL); CeedChk(ierr); 665 } 666 } 667 668 // Restore input arrays 669 ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields, 670 opinputfields, true, impl); CeedChk(ierr); 671 672 // Output blocked restriction 673 ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr); 674 ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr); 675 CeedElemRestriction blkrstr; 676 ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize, 677 numelements*Q, 678 numactivein*numactiveout, 679 strides, &blkrstr); 680 CeedChk(ierr); 681 ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled, 682 request); CeedChk(ierr); 683 684 // Cleanup 685 for (CeedInt i=0; i<numactivein; i++) { 686 ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr); 687 } 688 ierr = CeedFree(&activein); CeedChk(ierr); 689 ierr = CeedVectorDestroy(&lvec); CeedChk(ierr); 690 ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr); 691 692 return 0; 693 } 694 695 //------------------------------------------------------------------------------ 696 // Operator Destroy 697 //------------------------------------------------------------------------------ 698 static int CeedOperatorDestroy_Blocked(CeedOperator op) { 699 int ierr; 700 CeedOperator_Blocked *impl; 701 ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr); 702 703 for (CeedInt i=0; i<impl->numein+impl->numeout; i++) { 704 ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr); 705 ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr); 706 } 707 ierr = CeedFree(&impl->blkrestr); CeedChk(ierr); 708 ierr = CeedFree(&impl->evecs); CeedChk(ierr); 709 ierr = CeedFree(&impl->edata); CeedChk(ierr); 710 ierr = CeedFree(&impl->inputstate); CeedChk(ierr); 711 712 for (CeedInt i=0; i<impl->numein; i++) { 713 ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr); 714 ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr); 715 } 716 ierr = CeedFree(&impl->evecsin); CeedChk(ierr); 717 ierr = CeedFree(&impl->qvecsin); CeedChk(ierr); 718 719 for (CeedInt i=0; i<impl->numeout; i++) { 720 ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr); 721 ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr); 722 } 723 ierr = CeedFree(&impl->evecsout); CeedChk(ierr); 724 ierr = CeedFree(&impl->qvecsout); CeedChk(ierr); 725 726 ierr = CeedFree(&impl); CeedChk(ierr); 727 return 0; 728 } 729 730 //------------------------------------------------------------------------------ 731 // Operator Create 732 //------------------------------------------------------------------------------ 733 int CeedOperatorCreate_Blocked(CeedOperator op) { 734 int ierr; 735 Ceed ceed; 736 ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); 737 CeedOperator_Blocked *impl; 738 739 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 740 ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr); 741 742 ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction", 743 CeedOperatorAssembleLinearQFunction_Blocked); 744 CeedChk(ierr); 745 ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 746 CeedOperatorApply_Blocked); CeedChk(ierr); 747 ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 748 CeedOperatorDestroy_Blocked); CeedChk(ierr); 749 return 0; 750 } 751 //------------------------------------------------------------------------------ 752