1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed-impl.h> 18 #include <string.h> 19 #include "magma.h" 20 21 typedef struct { 22 CeedScalar *array; 23 CeedScalar *darray; 24 int own_; 25 } CeedVector_Magma; 26 27 typedef struct { 28 CeedInt *indices; 29 CeedInt *dindices; 30 int own_; 31 } CeedElemRestriction_Magma; 32 33 typedef struct { 34 CeedVector etmp; 35 CeedVector qdata; 36 } CeedOperator_Magma; 37 38 // ***************************************************************************** 39 // * Initialize vector vec (after free mem) with values from array based on cmode 40 // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 41 // * to array, and data is copied (not store passed pointer) 42 // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 43 // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 44 // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 45 // ***************************************************************************** 46 static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 47 CeedCopyMode cmode, CeedScalar *array) { 48 CeedVector_Magma *impl = vec->data; 49 int ierr; 50 51 // If own data, free that "old" data, e.g., as it may be of different size 52 if (impl->own_){ 53 magma_free( impl->darray ); 54 magma_free_pinned( impl->array ); 55 impl->darray = NULL; 56 impl->array = NULL; 57 impl->own_ = 0; 58 } 59 60 if (mtype == CEED_MEM_HOST) { 61 // memory is on the host; own_ = 0 62 switch (cmode) { 63 case CEED_COPY_VALUES: 64 ierr = magma_malloc( (void**)&impl->darray, 65 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 66 ierr = magma_malloc_pinned( (void**)&impl->array, 67 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 68 impl->own_ = 1; 69 70 if (array) 71 magma_setvector(vec->length, sizeof(array[0]), 72 array, 1, impl->darray, 1); 73 break; 74 case CEED_OWN_POINTER: 75 ierr = magma_malloc( (void**)&impl->darray, 76 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 77 // TODO: possible problem here is if we are passed non-pinned memory; 78 // (as we own it, lter in destroy, we use free for pinned memory). 79 impl->array = array; 80 impl->own_ = 1; 81 82 if (array) 83 magma_setvector(vec->length, sizeof(array[0]), 84 array, 1, impl->darray, 1); 85 break; 86 case CEED_USE_POINTER: 87 impl->darray = NULL; 88 impl->array = array; 89 } 90 } else if (mtype == CEED_MEM_DEVICE) { 91 // memory is on the device; own = 0 92 switch (cmode) { 93 case CEED_COPY_VALUES: 94 ierr = magma_malloc( (void**)&impl->darray, 95 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 96 ierr = magma_malloc_pinned( (void**)&impl->array, 97 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 98 impl->own_ = 1; 99 100 if (array) 101 magma_copyvector(vec->length, sizeof(array[0]), 102 array, 1, impl->darray, 1); 103 break; 104 case CEED_OWN_POINTER: 105 impl->darray = array; 106 ierr = magma_malloc_pinned( (void**)&impl->array, 107 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 108 impl->own_ = 1; 109 110 break; 111 case CEED_USE_POINTER: 112 impl->darray = array; 113 impl->array = NULL; 114 } 115 116 } else 117 return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 118 119 return 0; 120 } 121 122 // ***************************************************************************** 123 // * Give data pointer from vector vec to array (on HOST or DEVICE) 124 // ***************************************************************************** 125 static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 126 CeedScalar **array) { 127 CeedVector_Magma *impl = vec->data; 128 int ierr; 129 130 if (mtype == CEED_MEM_HOST) { 131 if (!impl->array){ 132 // Allocate data if array is NULL 133 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 134 CeedChk(ierr); 135 } else if (impl->own_) { 136 // data is owned so GPU had the most up-to-date version; copy it 137 magma_getvector(vec->length, sizeof(*array[0]), 138 impl->darray, 1, impl->array, 1); 139 } 140 *array = impl->array; 141 } else if (mtype == CEED_MEM_DEVICE) { 142 if (!impl->darray){ 143 // Allocate data if darray is NULL 144 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 145 CeedChk(ierr); 146 } 147 *array = impl->darray; 148 } else 149 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 150 151 return 0; 152 } 153 154 // ***************************************************************************** 155 // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 156 // ***************************************************************************** 157 static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 158 const CeedScalar **array) { 159 CeedVector_Magma *impl = vec->data; 160 int ierr; 161 162 if (mtype == CEED_MEM_HOST) { 163 if (!impl->array){ 164 // Allocate data if array is NULL 165 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 166 CeedChk(ierr); 167 } else if (impl->own_) { 168 // data is owned so GPU had the most up-to-date version; copy it 169 magma_getvector(vec->length, sizeof(*array[0]), 170 impl->darray, 1, impl->array, 1); 171 } 172 *array = impl->array; 173 } else if (mtype == CEED_MEM_DEVICE) { 174 if (!impl->darray){ 175 // Allocate data if darray is NULL 176 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 177 CeedChk(ierr); 178 } 179 *array = impl->darray; 180 } else 181 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 182 183 return 0; 184 } 185 186 // ***************************************************************************** 187 // * There is no mtype here for array so it is not clear if we restore from HOST 188 // * memory or from DEVICE memory. We assume that it is CPU memory because if 189 // * it was GPU memory we would not call this routine at all. 190 // * Restore vector vec with values from array, where array received its values 191 // * from vec and possibly modified them. 192 // ***************************************************************************** 193 static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 194 CeedVector_Magma *impl = vec->data; 195 196 if (impl->darray!=NULL) 197 magma_setvector(vec->length, sizeof(*array[0]), 198 *array, 1, impl->darray, 1); 199 200 *array = NULL; 201 return 0; 202 } 203 204 // ***************************************************************************** 205 // * There is no mtype here for array so it is not clear if we restore from HOST 206 // * memory or from DEVICE memory. We assume that it is CPU memory because if 207 // * it was GPU memory we would not call this routine at all. 208 // * Restore vector vec with values from array, where array received its values 209 // * from vec to only read them; in this case vec may have been modified meanwhile 210 // * and needs to be restored here. 211 // ***************************************************************************** 212 static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 213 const CeedScalar **array) { 214 CeedVector_Magma *impl = vec->data; 215 216 if (impl->darray!=NULL) 217 magma_setvector(vec->length, sizeof(*array[0]), 218 *array, 1, impl->darray, 1); 219 220 *array = NULL; 221 return 0; 222 } 223 224 static int CeedVectorDestroy_Magma(CeedVector vec) { 225 CeedVector_Magma *impl = vec->data; 226 int ierr; 227 228 // Free if we own the data 229 if (impl->own_){ 230 ierr = magma_free_pinned(impl->array); CeedChk(ierr); 231 ierr = magma_free(impl->darray); CeedChk(ierr); 232 } 233 ierr = CeedFree(&vec->data); CeedChk(ierr); 234 return 0; 235 } 236 237 // ***************************************************************************** 238 // * Create vector vec of size n 239 // ***************************************************************************** 240 static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 241 CeedVector_Magma *impl; 242 int ierr; 243 244 vec->SetArray = CeedVectorSetArray_Magma; 245 vec->GetArray = CeedVectorGetArray_Magma; 246 vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 247 vec->RestoreArray = CeedVectorRestoreArray_Magma; 248 vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 249 vec->Destroy = CeedVectorDestroy_Magma; 250 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 251 impl->darray = NULL; 252 impl->array = NULL; 253 impl->own_ = 0; 254 vec->data = impl; 255 return 0; 256 } 257 258 // ***************************************************************************** 259 // * Apply restriction operator r to u: v = r(rmode) u 260 // ***************************************************************************** 261 static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 262 CeedTransposeMode tmode, CeedInt ncomp, 263 CeedTransposeMode lmode, CeedVector u, 264 CeedVector v, CeedRequest *request) { 265 CeedElemRestriction_Magma *impl = r->data; 266 int ierr; 267 const CeedScalar *uu; 268 CeedScalar *vv; 269 CeedInt esize = r->nelem*r->elemsize; 270 //printf("HELLOOOOOOOOO=======================\n"); 271 272 ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 273 ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 274 if (tmode == CEED_NOTRANSPOSE) { 275 // Perform: v = r * u 276 if (ncomp == 1) { 277 for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]]; 278 279 /* 280 // Works - in t05 x has to be with CEED_COPY_VALUES 281 CeedVector_Magma *uimpl = u->data, *vimpl = v->data; 282 uu = uimpl->darray; 283 vv = vimpl->darray; 284 CeedInt *indices;// = (int *)impl->indices; 285 magma_malloc( (void**)&indices, esize * sizeof(CeedInt)); 286 magma_setvector(esize, sizeof(CeedInt), 287 (int *)impl->indices, 1, indices, 1); 288 magma_template<<i=0:esize>>(const CeedScalar *uu, CeedScalar *vv, CeedInt *indices) 289 { 290 vv[i] = uu[indices[i]]; 291 } 292 293 // printf("HELLOOOOOOOOO....... size = %d\n", esize); 294 // 295 296 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 297 *request = NULL; 298 return 0; 299 */ 300 301 } else { 302 //printf("HELLOOOOOOOOO-------------\n"); 303 // vv is (elemsize x ncomp x nelem), column-major 304 if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 305 for (CeedInt e = 0; e < r->nelem; e++) 306 for (CeedInt d = 0; d < ncomp; d++) 307 for (CeedInt i=0; i<r->elemsize; i++) { 308 vv[i+r->elemsize*(d+ncomp*e)] = 309 uu[impl->indices[i+r->elemsize*e]+r->ndof*d]; 310 } 311 } else { // u is (ncomp x ndof), column-major 312 for (CeedInt e = 0; e < r->nelem; e++) 313 for (CeedInt d = 0; d < ncomp; d++) 314 for (CeedInt i=0; i<r->elemsize; i++) { 315 vv[i+r->elemsize*(d+ncomp*e)] = 316 uu[d+ncomp*impl->indices[i+r->elemsize*e]]; 317 } 318 } 319 } 320 } else { 321 // Note: in transpose mode, we perform: v += r^t * u 322 if (ncomp == 1) { 323 for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i]; 324 } else { 325 //printf("HELLOOOOOOOOO+++++++++++++\n"); 326 // u is (elemsize x ncomp x nelem) 327 if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 328 for (CeedInt e = 0; e < r->nelem; e++) 329 for (CeedInt d = 0; d < ncomp; d++) 330 for (CeedInt i=0; i<r->elemsize; i++) { 331 vv[impl->indices[i+r->elemsize*e]+r->ndof*d] += 332 uu[i+r->elemsize*(d+e*ncomp)]; 333 } 334 } else { // vv is (ncomp x ndof), column-major 335 for (CeedInt e = 0; e < r->nelem; e++) 336 for (CeedInt d = 0; d < ncomp; d++) 337 for (CeedInt i=0; i<r->elemsize; i++) { 338 vv[d+ncomp*impl->indices[i+r->elemsize*e]] += 339 uu[i+r->elemsize*(d+e*ncomp)]; 340 } 341 } 342 } 343 } 344 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 345 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 346 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 347 *request = NULL; 348 return 0; 349 } 350 351 static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 352 CeedElemRestriction_Magma *impl = r->data; 353 int ierr; 354 355 // Free if we own the data 356 if (impl->own_){ 357 ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 358 ierr = magma_free(impl->dindices); CeedChk(ierr); 359 } 360 ierr = CeedFree(&r->data); CeedChk(ierr); 361 return 0; 362 } 363 364 static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 365 CeedMemType mtype, 366 CeedCopyMode cmode, 367 const CeedInt *indices) { 368 int ierr, size = r->nelem*r->elemsize; 369 CeedElemRestriction_Magma *impl; 370 371 // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 372 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 373 impl->dindices = NULL; 374 impl->indices = NULL; 375 impl->own_ = 0; 376 377 if (mtype == CEED_MEM_HOST) { 378 // memory is on the host; own_ = 0 379 switch (cmode) { 380 case CEED_COPY_VALUES: 381 ierr = magma_malloc( (void**)&impl->dindices, 382 size * sizeof(CeedInt)); CeedChk(ierr); 383 ierr = magma_malloc_pinned( (void**)&impl->indices, 384 size * sizeof(CeedInt)); CeedChk(ierr); 385 impl->own_ = 1; 386 387 if (indices) 388 magma_setvector(size, sizeof(CeedInt), 389 indices, 1, impl->dindices, 1); 390 break; 391 case CEED_OWN_POINTER: 392 ierr = magma_malloc( (void**)&impl->dindices, 393 size * sizeof(CeedInt)); CeedChk(ierr); 394 // TODO: possible problem here is if we are passed non-pinned memory; 395 // (as we own it, lter in destroy, we use free for pinned memory). 396 impl->indices = indices; 397 impl->own_ = 1; 398 399 if (indices) 400 magma_setvector(size, sizeof(CeedInt), 401 indices, 1, impl->dindices, 1); 402 break; 403 case CEED_USE_POINTER: 404 impl->dindices = NULL; 405 impl->indices = indices; 406 } 407 } else if (mtype == CEED_MEM_DEVICE) { 408 // memory is on the device; own = 0 409 switch (cmode) { 410 case CEED_COPY_VALUES: 411 ierr = magma_malloc( (void**)&impl->dindices, 412 size * sizeof(CeedInt)); CeedChk(ierr); 413 ierr = magma_malloc_pinned( (void**)&impl->indices, 414 size * sizeof(CeedInt)); CeedChk(ierr); 415 impl->own_ = 1; 416 417 if (indices) 418 magma_copyvector(size, sizeof(CeedInt), 419 indices, 1, impl->dindices, 1); 420 break; 421 case CEED_OWN_POINTER: 422 impl->dindices = indices; 423 ierr = magma_malloc_pinned( (void**)&impl->indices, 424 size * sizeof(CeedInt)); CeedChk(ierr); 425 impl->own_ = 1; 426 427 break; 428 case CEED_USE_POINTER: 429 impl->dindices = indices; 430 impl->indices = NULL; 431 } 432 433 } else 434 return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 435 436 r->data = impl; 437 r->Apply = CeedElemRestrictionApply_Magma; 438 r->Destroy = CeedElemRestrictionDestroy_Magma; 439 440 return 0; 441 } 442 443 // Contracts on the middle index 444 // NOTRANSPOSE: V_ajc = T_jb U_abc 445 // TRANSPOSE: V_ajc = T_bj U_abc 446 // If Add != 0, "=" is replaced by "+=" 447 static int CeedTensorContract_Magma(Ceed ceed, 448 CeedInt A, CeedInt B, CeedInt C, CeedInt J, 449 const CeedScalar *t, CeedTransposeMode tmode, 450 const CeedInt Add, 451 const CeedScalar *u, CeedScalar *v) { 452 CeedInt tstride0 = B, tstride1 = 1; 453 if (tmode == CEED_TRANSPOSE) { 454 tstride0 = 1; tstride1 = J; 455 } 456 457 for (CeedInt a=0; a<A; a++) { 458 for (CeedInt j=0; j<J; j++) { 459 if (!Add) { 460 for (CeedInt c=0; c<C; c++) 461 v[(a*J+j)*C+c] = 0; 462 } 463 for (CeedInt b=0; b<B; b++) { 464 for (CeedInt c=0; c<C; c++) { 465 v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 466 } 467 } 468 } 469 } 470 return 0; 471 } 472 473 static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 474 CeedEvalMode emode, 475 const CeedScalar *u, CeedScalar *v) { 476 int ierr; 477 const CeedInt dim = basis->dim; 478 const CeedInt ndof = basis->ndof; 479 const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 480 const CeedInt add = (tmode == CEED_TRANSPOSE); 481 482 if (tmode == CEED_TRANSPOSE) { 483 const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 484 for (CeedInt i = 0; i < vsize; i++) 485 v[i] = (CeedScalar) 0; 486 } 487 if (emode & CEED_EVAL_INTERP) { 488 CeedInt P = basis->P1d, Q = basis->Q1d; 489 if (tmode == CEED_TRANSPOSE) { 490 P = basis->Q1d; Q = basis->P1d; 491 } 492 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 493 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 494 for (CeedInt d=0; d<dim; d++) { 495 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 496 tmode, add&&(d==dim-1), 497 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 498 CeedChk(ierr); 499 pre /= P; 500 post *= Q; 501 } 502 if (tmode == CEED_NOTRANSPOSE) { 503 v += nqpt; 504 } else { 505 u += nqpt; 506 } 507 } 508 if (emode & CEED_EVAL_GRAD) { 509 CeedInt P = basis->P1d, Q = basis->Q1d; 510 // In CEED_NOTRANSPOSE mode: 511 // u is (P^dim x nc), column-major layout (nc = ndof) 512 // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 513 // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 514 if (tmode == CEED_TRANSPOSE) { 515 P = basis->Q1d, Q = basis->P1d; 516 } 517 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 518 for (CeedInt p = 0; p < dim; p++) { 519 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 520 for (CeedInt d=0; d<dim; d++) { 521 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 522 (p==d)?basis->grad1d:basis->interp1d, 523 tmode, add&&(d==dim-1), 524 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 525 CeedChk(ierr); 526 pre /= P; 527 post *= Q; 528 } 529 if (tmode == CEED_NOTRANSPOSE) { 530 v += nqpt; 531 } else { 532 u += nqpt; 533 } 534 } 535 } 536 if (emode & CEED_EVAL_WEIGHT) { 537 if (tmode == CEED_TRANSPOSE) 538 return CeedError(basis->ceed, 1, 539 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 540 CeedInt Q = basis->Q1d; 541 for (CeedInt d=0; d<dim; d++) { 542 CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 543 for (CeedInt i=0; i<pre; i++) { 544 for (CeedInt j=0; j<Q; j++) { 545 for (CeedInt k=0; k<post; k++) { 546 v[(i*Q + j)*post + k] = basis->qweight1d[j] 547 * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 548 } 549 } 550 } 551 } 552 } 553 return 0; 554 } 555 556 static int CeedBasisDestroy_Magma(CeedBasis basis) { 557 return 0; 558 } 559 560 static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 561 CeedInt Q1d, const CeedScalar *interp1d, 562 const CeedScalar *grad1d, 563 const CeedScalar *qref1d, 564 const CeedScalar *qweight1d, 565 CeedBasis basis) { 566 basis->Apply = CeedBasisApply_Magma; 567 basis->Destroy = CeedBasisDestroy_Magma; 568 return 0; 569 } 570 571 static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 572 const CeedScalar *const *u, 573 CeedScalar *const *v) { 574 int ierr; 575 ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 576 return 0; 577 } 578 579 static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 580 return 0; 581 } 582 583 static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 584 qf->Apply = CeedQFunctionApply_Magma; 585 qf->Destroy = CeedQFunctionDestroy_Magma; 586 return 0; 587 } 588 589 static int CeedOperatorDestroy_Magma(CeedOperator op) { 590 CeedOperator_Magma *impl = op->data; 591 int ierr; 592 593 ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 594 ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 595 ierr = CeedFree(&op->data); CeedChk(ierr); 596 return 0; 597 } 598 599 static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 600 CeedVector ustate, 601 CeedVector residual, CeedRequest *request) { 602 CeedOperator_Magma *impl = op->data; 603 CeedVector etmp; 604 CeedInt Q; 605 const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 606 CeedScalar *Eu; 607 char *qd; 608 int ierr; 609 CeedTransposeMode lmode = CEED_NOTRANSPOSE; 610 611 if (!impl->etmp) { 612 ierr = CeedVectorCreate(op->ceed, 613 nc * op->Erestrict->nelem * op->Erestrict->elemsize, 614 &impl->etmp); CeedChk(ierr); 615 // etmp is allocated when CeedVectorGetArray is called below 616 } 617 etmp = impl->etmp; 618 if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 619 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 620 nc, lmode, ustate, etmp, 621 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 622 } 623 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 624 ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 625 ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 626 CeedChk(ierr); 627 for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 628 CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 629 const CeedScalar *in[5] = {0,0,0,0,0}; 630 // TODO: quadrature weights can be computed just once 631 ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 632 &Eu[e*op->Erestrict->elemsize*nc], BEu); 633 CeedChk(ierr); 634 CeedScalar *u_ptr = BEu, *v_ptr = BEv; 635 if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 636 if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 637 if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 638 if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 639 if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 640 ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 641 CeedChk(ierr); 642 ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 643 &Eu[e*op->Erestrict->elemsize*nc]); 644 CeedChk(ierr); 645 } 646 ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 647 if (residual) { 648 CeedScalar *res; 649 CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 650 for (int i = 0; i < residual->length; i++) 651 res[i] = (CeedScalar)0; 652 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 653 nc, lmode, etmp, residual, 654 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 655 } 656 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 657 *request = NULL; 658 return 0; 659 } 660 661 static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 662 CeedOperator_Magma *impl = op->data; 663 int ierr; 664 665 if (!impl->qdata) { 666 CeedInt Q; 667 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 668 ierr = CeedVectorCreate(op->ceed, 669 op->Erestrict->nelem * Q 670 * op->qf->qdatasize / sizeof(CeedScalar), 671 &impl->qdata); CeedChk(ierr); 672 } 673 *qdata = impl->qdata; 674 return 0; 675 } 676 677 static int CeedOperatorCreate_Magma(CeedOperator op) { 678 CeedOperator_Magma *impl; 679 int ierr; 680 681 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 682 op->data = impl; 683 op->Destroy = CeedOperatorDestroy_Magma; 684 op->Apply = CeedOperatorApply_Magma; 685 op->GetQData = CeedOperatorGetQData_Magma; 686 return 0; 687 } 688 689 // ***************************************************************************** 690 // * INIT 691 // ***************************************************************************** 692 static int CeedInit_Magma(const char *resource, Ceed ceed) { 693 if (strcmp(resource, "/cpu/magma") 694 && strcmp(resource, "/gpu/magma")) 695 return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 696 697 magma_init(); 698 //magma_print_environment(); 699 700 ceed->VecCreate = CeedVectorCreate_Magma; 701 ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 702 ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 703 ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 704 ceed->OperatorCreate = CeedOperatorCreate_Magma; 705 return 0; 706 } 707 708 // ***************************************************************************** 709 // * REGISTER 710 // ***************************************************************************** 711 __attribute__((constructor)) 712 static void Register(void) { 713 CeedRegister("/cpu/magma", CeedInit_Magma); 714 CeedRegister("/gpu/magma", CeedInit_Magma); 715 } 716