1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed-impl.h> 18 #include <string.h> 19 #include "magma.h" 20 21 typedef struct { 22 CeedScalar *array; 23 CeedScalar *darray; 24 int own_; 25 int down_; 26 } CeedVector_Magma; 27 28 typedef struct { 29 CeedInt *indices; 30 CeedInt *dindices; 31 int own_; 32 int down_; // cover a case where we own Device memory 33 } CeedElemRestriction_Magma; 34 35 typedef struct { 36 CeedVector etmp; 37 CeedVector qdata; 38 } CeedOperator_Magma; 39 40 // ***************************************************************************** 41 // * Initialize vector vec (after free mem) with values from array based on cmode 42 // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 43 // * to array, and data is copied (not store passed pointer) 44 // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 45 // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 46 // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 47 // ***************************************************************************** 48 static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 49 CeedCopyMode cmode, CeedScalar *array) { 50 CeedVector_Magma *impl = vec->data; 51 int ierr; 52 53 // If own data, free the "old" data, e.g., as it may be of different size 54 if (impl->own_){ 55 magma_free( impl->darray ); 56 magma_free_pinned( impl->array ); 57 impl->darray = NULL; 58 impl->array = NULL; 59 impl->own_ = 0; 60 impl->down_= 0; 61 } 62 63 if (mtype == CEED_MEM_HOST) { 64 // memory is on the host; own_ = 0 65 switch (cmode) { 66 case CEED_COPY_VALUES: 67 ierr = magma_malloc( (void**)&impl->darray, 68 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 69 ierr = magma_malloc_pinned( (void**)&impl->array, 70 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 71 impl->own_ = 1; 72 73 if (array != NULL) 74 magma_setvector(vec->length, sizeof(array[0]), 75 array, 1, impl->darray, 1); 76 break; 77 case CEED_OWN_POINTER: 78 ierr = magma_malloc( (void**)&impl->darray, 79 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 80 // TODO: possible problem here is if we are passed non-pinned memory; 81 // (as we own it, lter in destroy, we use free for pinned memory). 82 impl->array = array; 83 impl->own_ = 1; 84 85 if (array != NULL) 86 magma_setvector(vec->length, sizeof(array[0]), 87 array, 1, impl->darray, 1); 88 break; 89 case CEED_USE_POINTER: 90 ierr = magma_malloc( (void**)&impl->darray, 91 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 92 magma_setvector(vec->length, sizeof(array[0]), 93 array, 1, impl->darray, 1); 94 impl->array = array; 95 } 96 } else if (mtype == CEED_MEM_DEVICE) { 97 // memory is on the device; own = 0 98 switch (cmode) { 99 case CEED_COPY_VALUES: 100 ierr = magma_malloc( (void**)&impl->darray, 101 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 102 ierr = magma_malloc_pinned( (void**)&impl->array, 103 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 104 impl->own_ = 1; 105 106 if (array) 107 magma_copyvector(vec->length, sizeof(array[0]), 108 array, 1, impl->darray, 1); 109 break; 110 case CEED_OWN_POINTER: 111 impl->darray = array; 112 ierr = magma_malloc_pinned( (void**)&impl->array, 113 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 114 impl->own_ = 1; 115 116 break; 117 case CEED_USE_POINTER: 118 impl->darray = array; 119 impl->array = NULL; 120 } 121 122 } else 123 return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 124 125 return 0; 126 } 127 128 // ***************************************************************************** 129 // * Give data pointer from vector vec to array (on HOST or DEVICE) 130 // ***************************************************************************** 131 static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 132 CeedScalar **array) { 133 CeedVector_Magma *impl = vec->data; 134 int ierr; 135 136 if (mtype == CEED_MEM_HOST) { 137 if (impl->own_) { 138 // data is owned so GPU had the most up-to-date version; copy it 139 magma_getvector(vec->length, sizeof(*array[0]), 140 impl->darray, 1, impl->array, 1); 141 } else if (impl->array == NULL) { 142 // Vector doesn't own the data and was set on GPU 143 if (impl->darray == NULL) { 144 // call was made just to allocate memory 145 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 146 CeedChk(ierr); 147 } else 148 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 149 } 150 *array = impl->array; 151 } else if (mtype == CEED_MEM_DEVICE) { 152 if (impl->darray == NULL){ 153 // Vector doesn't own the data and was set on the CPU 154 if (impl->array == NULL) { 155 // call was made just to allocate memory 156 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 157 CeedChk(ierr); 158 } else 159 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 160 } 161 *array = impl->darray; 162 } else 163 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 164 165 return 0; 166 } 167 168 // ***************************************************************************** 169 // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 170 // ***************************************************************************** 171 static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 172 const CeedScalar **array) { 173 CeedVector_Magma *impl = vec->data; 174 int ierr; 175 176 if (mtype == CEED_MEM_HOST) { 177 if (impl->own_) { 178 // data is owned so GPU had the most up-to-date version; copy it 179 magma_getvector(vec->length, sizeof(*array[0]), 180 impl->darray, 1, impl->array, 1); 181 } else if (impl->array == NULL) { 182 // Vector doesn't own the data and was set on GPU 183 if (impl->darray == NULL) { 184 // call was made just to allocate memory 185 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 186 CeedChk(ierr); 187 } else 188 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 189 } 190 *array = impl->array; 191 } else if (mtype == CEED_MEM_DEVICE) { 192 if (impl->darray == NULL){ 193 // Vector doesn't own the data and was set on the CPU 194 if (impl->array == NULL) { 195 // call was made just to allocate memory 196 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 197 CeedChk(ierr); 198 } else 199 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 200 } 201 *array = impl->darray; 202 } else 203 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 204 205 return 0; 206 } 207 208 // ***************************************************************************** 209 // * There is no mtype here for array so it is not clear if we restore from HOST 210 // * memory or from DEVICE memory. We assume that it is CPU memory because if 211 // * it was GPU memory we would not call this routine at all. 212 // * Restore vector vec with values from array, where array received its values 213 // * from vec and possibly modified them. 214 // ***************************************************************************** 215 static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 216 CeedVector_Magma *impl = vec->data; 217 218 // Check if the array is a CPU pointer 219 if (*array == impl->array) { 220 // Update device, if the device pointer is not NULL 221 if (impl->darray != NULL) { 222 magma_setvector(vec->length, sizeof(*array[0]), 223 *array, 1, impl->darray, 1); 224 } else 225 ; // nothing to do (case of CPU use pointer) 226 227 } else if (impl->down_) { 228 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 229 magma_getvector(vec->length, sizeof(*array[0]), 230 impl->darray, 1, impl->array, 1); 231 } 232 233 *array = NULL; 234 return 0; 235 } 236 237 // ***************************************************************************** 238 // * There is no mtype here for array so it is not clear if we restore from HOST 239 // * memory or from DEVICE memory. We assume that it is CPU memory because if 240 // * it was GPU memory we would not call this routine at all. 241 // * Restore vector vec with values from array, where array received its values 242 // * from vec to only read them; in this case vec may have been modified meanwhile 243 // * and needs to be restored here. 244 // ***************************************************************************** 245 static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 246 const CeedScalar **array) { 247 CeedVector_Magma *impl = vec->data; 248 249 // Check if the array is a CPU pointer 250 if (*array == impl->array) { 251 // Update device, if the device pointer is not NULL 252 if (impl->darray != NULL) 253 magma_setvector(vec->length, sizeof(*array[0]), 254 *array, 1, impl->darray, 1); 255 else 256 ; // nothing to do (case of CPU use pointer) 257 258 } else if (impl->down_) { 259 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 260 magma_getvector(vec->length, sizeof(*array[0]), 261 impl->darray, 1, impl->array, 1); 262 } 263 264 *array = NULL; 265 return 0; 266 } 267 268 static int CeedVectorDestroy_Magma(CeedVector vec) { 269 CeedVector_Magma *impl = vec->data; 270 int ierr; 271 272 // Free if we own the data 273 if (impl->own_){ 274 ierr = magma_free_pinned(impl->array); CeedChk(ierr); 275 ierr = magma_free(impl->darray); CeedChk(ierr); 276 } else if (impl->down_){ 277 ierr = magma_free(impl->darray); CeedChk(ierr); 278 } 279 ierr = CeedFree(&vec->data); CeedChk(ierr); 280 return 0; 281 } 282 283 // ***************************************************************************** 284 // * Create vector vec of size n 285 // ***************************************************************************** 286 static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 287 CeedVector_Magma *impl; 288 int ierr; 289 290 vec->SetArray = CeedVectorSetArray_Magma; 291 vec->GetArray = CeedVectorGetArray_Magma; 292 vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 293 vec->RestoreArray = CeedVectorRestoreArray_Magma; 294 vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 295 vec->Destroy = CeedVectorDestroy_Magma; 296 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 297 impl->darray = NULL; 298 impl->array = NULL; 299 impl->own_ = 0; 300 impl->down_= 0; 301 vec->data = impl; 302 return 0; 303 } 304 305 // ***************************************************************************** 306 // * Apply restriction operator r to u: v = r(rmode) u 307 // ***************************************************************************** 308 static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 309 CeedTransposeMode tmode, CeedInt ncomp, 310 CeedTransposeMode lmode, CeedVector u, 311 CeedVector v, CeedRequest *request) { 312 CeedElemRestriction_Magma *impl = r->data; 313 int ierr; 314 const CeedScalar *uu; 315 CeedScalar *vv; 316 CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof; 317 CeedInt esize = nelem * elemsize; 318 // CeedInt *indices = impl->indices; 319 CeedInt *dindices = impl->dindices; 320 321 //ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 322 //ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 323 324 // Get pointers on the device 325 ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr); 326 ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr); 327 328 if (tmode == CEED_NOTRANSPOSE) { 329 // Perform: v = r * u 330 if (ncomp == 1) { 331 //for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]]; 332 333 magma_template<<i=0:esize>> 334 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices){ 335 vv[i] = uu[dindices[i]]; 336 } 337 } else { 338 // vv is (elemsize x ncomp x nelem), column-major 339 if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 340 /* 341 for (CeedInt e = 0; e < nelem; e++) 342 for (CeedInt d = 0; d < ncomp; d++) 343 for (CeedInt i=0; i < elemsize; i++) { 344 vv[i + elemsize*(d+ncomp*e)] = 345 uu[indices[i+elemsize*e]+ndof*d]; 346 } 347 */ 348 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 349 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) { 350 vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d]; 351 } 352 } else { // u is (ncomp x ndof), column-major 353 /* 354 for (CeedInt e = 0; e < nelem; e++) 355 for (CeedInt d = 0; d < ncomp; d++) 356 for (CeedInt i=0; i< elemsize; i++) { 357 vv[i + elemsize*(d+ncomp*e)] = 358 uu[d+ncomp*indices[i+elemsize*e]]; 359 } 360 */ 361 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 362 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 363 vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]]; 364 } 365 } 366 } 367 } else { 368 // Note: in transpose mode, we perform: v += r^t * u 369 if (ncomp == 1) { 370 // fprintf(stderr,"3 ---------\n"); 371 // for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i]; 372 magma_template<<i=0:esize>> 373 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 374 magmablas_datomic_add( &vv[dindices[i]], uu[i]); 375 } 376 } else { // u is (elemsize x ncomp x nelem) 377 fprintf(stderr,"2 ---------\n"); 378 379 if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 380 /* 381 for (CeedInt e = 0; e < nelem; e++) 382 for (CeedInt d = 0; d < ncomp; d++) 383 for (CeedInt i=0; i < elemsize; i++) { 384 vv[indices[i + elemsize*e]+ndof*d] += 385 uu[i + elemsize*(d+e*ncomp)]; 386 } 387 */ 388 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 389 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) { 390 magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d], 391 uu[i+iend*(d+e*dend)]); 392 } 393 } else { // vv is (ncomp x ndof), column-major 394 /* 395 for (CeedInt e = 0; e < nelem; e++) 396 for (CeedInt d = 0; d < ncomp; d++) 397 for (CeedInt i=0; i < elemsize; i++) { 398 vv[d+ncomp*indices[i + elemsize*e]] += 399 uu[i + elemsize*(d+e*ncomp)]; 400 } 401 */ 402 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 403 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 404 magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]], 405 uu[i+iend*(d+e*dend)]); 406 } 407 } 408 } 409 } 410 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 411 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 412 413 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 414 *request = NULL; 415 return 0; 416 } 417 418 static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 419 CeedElemRestriction_Magma *impl = r->data; 420 int ierr; 421 422 // Free if we own the data 423 if (impl->own_){ 424 ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 425 ierr = magma_free(impl->dindices); CeedChk(ierr); 426 } 427 else if (impl->down_){ 428 ierr = magma_free(impl->dindices); CeedChk(ierr); 429 } 430 ierr = CeedFree(&r->data); CeedChk(ierr); 431 return 0; 432 } 433 434 static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 435 CeedMemType mtype, 436 CeedCopyMode cmode, 437 const CeedInt *indices) { 438 int ierr, size = r->nelem*r->elemsize; 439 CeedElemRestriction_Magma *impl; 440 441 // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 442 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 443 impl->dindices = NULL; 444 impl->indices = NULL; 445 impl->own_ = 0; 446 impl->down_= 0; 447 448 if (mtype == CEED_MEM_HOST) { 449 // memory is on the host; own_ = 0 450 switch (cmode) { 451 case CEED_COPY_VALUES: 452 ierr = magma_malloc( (void**)&impl->dindices, 453 size * sizeof(CeedInt)); CeedChk(ierr); 454 ierr = magma_malloc_pinned( (void**)&impl->indices, 455 size * sizeof(CeedInt)); CeedChk(ierr); 456 impl->own_ = 1; 457 458 if (indices != NULL) 459 magma_setvector(size, sizeof(CeedInt), 460 indices, 1, impl->dindices, 1); 461 break; 462 case CEED_OWN_POINTER: 463 ierr = magma_malloc( (void**)&impl->dindices, 464 size * sizeof(CeedInt)); CeedChk(ierr); 465 // TODO: possible problem here is if we are passed non-pinned memory; 466 // (as we own it, lter in destroy, we use free for pinned memory). 467 impl->indices = (CeedInt *)indices; 468 impl->own_ = 1; 469 470 if (indices != NULL) 471 magma_setvector(size, sizeof(CeedInt), 472 indices, 1, impl->dindices, 1); 473 break; 474 case CEED_USE_POINTER: 475 ierr = magma_malloc( (void**)&impl->dindices, 476 size * sizeof(CeedInt)); CeedChk(ierr); 477 magma_setvector(size, sizeof(CeedInt), 478 indices, 1, impl->dindices, 1); 479 impl->down_ = 1; 480 impl->indices = (CeedInt *)indices; 481 } 482 } else if (mtype == CEED_MEM_DEVICE) { 483 // memory is on the device; own = 0 484 switch (cmode) { 485 case CEED_COPY_VALUES: 486 ierr = magma_malloc( (void**)&impl->dindices, 487 size * sizeof(CeedInt)); CeedChk(ierr); 488 ierr = magma_malloc_pinned( (void**)&impl->indices, 489 size * sizeof(CeedInt)); CeedChk(ierr); 490 impl->own_ = 1; 491 492 if (indices) 493 magma_copyvector(size, sizeof(CeedInt), 494 indices, 1, impl->dindices, 1); 495 break; 496 case CEED_OWN_POINTER: 497 impl->dindices = (CeedInt *)indices; 498 ierr = magma_malloc_pinned( (void**)&impl->indices, 499 size * sizeof(CeedInt)); CeedChk(ierr); 500 impl->own_ = 1; 501 502 break; 503 case CEED_USE_POINTER: 504 impl->dindices = (CeedInt *)indices; 505 impl->indices = NULL; 506 } 507 508 } else 509 return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 510 511 r->data = impl; 512 r->Apply = CeedElemRestrictionApply_Magma; 513 r->Destroy = CeedElemRestrictionDestroy_Magma; 514 515 return 0; 516 } 517 518 // Contracts on the middle index 519 // NOTRANSPOSE: V_ajc = T_jb U_abc 520 // TRANSPOSE: V_ajc = T_bj U_abc 521 // If Add != 0, "=" is replaced by "+=" 522 static int CeedTensorContract_Magma(Ceed ceed, 523 CeedInt A, CeedInt B, CeedInt C, CeedInt J, 524 const CeedScalar *t, CeedTransposeMode tmode, 525 const CeedInt Add, 526 const CeedScalar *u, CeedScalar *v) { 527 CeedInt tstride0 = B, tstride1 = 1; 528 if (tmode == CEED_TRANSPOSE) { 529 tstride0 = 1; tstride1 = J; 530 } 531 532 for (CeedInt a=0; a<A; a++) { 533 for (CeedInt j=0; j<J; j++) { 534 if (!Add) { 535 for (CeedInt c=0; c<C; c++) 536 v[(a*J+j)*C+c] = 0; 537 } 538 for (CeedInt b=0; b<B; b++) { 539 for (CeedInt c=0; c<C; c++) { 540 v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 541 } 542 } 543 } 544 } 545 return 0; 546 } 547 548 static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 549 CeedEvalMode emode, 550 const CeedScalar *u, CeedScalar *v) { 551 int ierr; 552 const CeedInt dim = basis->dim; 553 const CeedInt ndof = basis->ndof; 554 const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 555 const CeedInt add = (tmode == CEED_TRANSPOSE); 556 557 if (tmode == CEED_TRANSPOSE) { 558 const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 559 for (CeedInt i = 0; i < vsize; i++) 560 v[i] = (CeedScalar) 0; 561 } 562 if (emode & CEED_EVAL_INTERP) { 563 CeedInt P = basis->P1d, Q = basis->Q1d; 564 if (tmode == CEED_TRANSPOSE) { 565 P = basis->Q1d; Q = basis->P1d; 566 } 567 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 568 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 569 for (CeedInt d=0; d<dim; d++) { 570 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 571 tmode, add&&(d==dim-1), 572 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 573 CeedChk(ierr); 574 pre /= P; 575 post *= Q; 576 } 577 if (tmode == CEED_NOTRANSPOSE) { 578 v += nqpt; 579 } else { 580 u += nqpt; 581 } 582 } 583 if (emode & CEED_EVAL_GRAD) { 584 CeedInt P = basis->P1d, Q = basis->Q1d; 585 // In CEED_NOTRANSPOSE mode: 586 // u is (P^dim x nc), column-major layout (nc = ndof) 587 // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 588 // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 589 if (tmode == CEED_TRANSPOSE) { 590 P = basis->Q1d, Q = basis->P1d; 591 } 592 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 593 for (CeedInt p = 0; p < dim; p++) { 594 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 595 for (CeedInt d=0; d<dim; d++) { 596 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 597 (p==d)?basis->grad1d:basis->interp1d, 598 tmode, add&&(d==dim-1), 599 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 600 CeedChk(ierr); 601 pre /= P; 602 post *= Q; 603 } 604 if (tmode == CEED_NOTRANSPOSE) { 605 v += nqpt; 606 } else { 607 u += nqpt; 608 } 609 } 610 } 611 if (emode & CEED_EVAL_WEIGHT) { 612 if (tmode == CEED_TRANSPOSE) 613 return CeedError(basis->ceed, 1, 614 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 615 CeedInt Q = basis->Q1d; 616 for (CeedInt d=0; d<dim; d++) { 617 CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 618 for (CeedInt i=0; i<pre; i++) { 619 for (CeedInt j=0; j<Q; j++) { 620 for (CeedInt k=0; k<post; k++) { 621 v[(i*Q + j)*post + k] = basis->qweight1d[j] 622 * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 623 } 624 } 625 } 626 } 627 } 628 return 0; 629 } 630 631 static int CeedBasisDestroy_Magma(CeedBasis basis) { 632 return 0; 633 } 634 635 static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 636 CeedInt Q1d, const CeedScalar *interp1d, 637 const CeedScalar *grad1d, 638 const CeedScalar *qref1d, 639 const CeedScalar *qweight1d, 640 CeedBasis basis) { 641 basis->Apply = CeedBasisApply_Magma; 642 basis->Destroy = CeedBasisDestroy_Magma; 643 return 0; 644 } 645 646 static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 647 const CeedScalar *const *u, 648 CeedScalar *const *v) { 649 int ierr; 650 ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 651 return 0; 652 } 653 654 static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 655 return 0; 656 } 657 658 static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 659 qf->Apply = CeedQFunctionApply_Magma; 660 qf->Destroy = CeedQFunctionDestroy_Magma; 661 return 0; 662 } 663 664 static int CeedOperatorDestroy_Magma(CeedOperator op) { 665 CeedOperator_Magma *impl = op->data; 666 int ierr; 667 668 ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 669 ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 670 ierr = CeedFree(&op->data); CeedChk(ierr); 671 return 0; 672 } 673 674 static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 675 CeedVector ustate, 676 CeedVector residual, CeedRequest *request) { 677 CeedOperator_Magma *impl = op->data; 678 CeedVector etmp; 679 CeedInt Q; 680 const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 681 CeedScalar *Eu; 682 char *qd; 683 int ierr; 684 CeedTransposeMode lmode = CEED_NOTRANSPOSE; 685 686 if (!impl->etmp) { 687 ierr = CeedVectorCreate(op->ceed, 688 nc * op->Erestrict->nelem * op->Erestrict->elemsize, 689 &impl->etmp); CeedChk(ierr); 690 // etmp is allocated when CeedVectorGetArray is called below 691 } 692 etmp = impl->etmp; 693 if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 694 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 695 nc, lmode, ustate, etmp, 696 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 697 } 698 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 699 ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 700 ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 701 CeedChk(ierr); 702 for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 703 CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 704 const CeedScalar *in[5] = {0,0,0,0,0}; 705 // TODO: quadrature weights can be computed just once 706 ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 707 &Eu[e*op->Erestrict->elemsize*nc], BEu); 708 CeedChk(ierr); 709 CeedScalar *u_ptr = BEu, *v_ptr = BEv; 710 if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 711 if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 712 if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 713 if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 714 if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 715 ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 716 CeedChk(ierr); 717 ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 718 &Eu[e*op->Erestrict->elemsize*nc]); 719 CeedChk(ierr); 720 } 721 ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 722 if (residual) { 723 CeedScalar *res; 724 CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 725 for (int i = 0; i < residual->length; i++) 726 res[i] = (CeedScalar)0; 727 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 728 nc, lmode, etmp, residual, 729 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 730 } 731 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 732 *request = NULL; 733 return 0; 734 } 735 736 static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 737 CeedOperator_Magma *impl = op->data; 738 int ierr; 739 740 if (!impl->qdata) { 741 CeedInt Q; 742 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 743 ierr = CeedVectorCreate(op->ceed, 744 op->Erestrict->nelem * Q 745 * op->qf->qdatasize / sizeof(CeedScalar), 746 &impl->qdata); CeedChk(ierr); 747 } 748 *qdata = impl->qdata; 749 return 0; 750 } 751 752 static int CeedOperatorCreate_Magma(CeedOperator op) { 753 CeedOperator_Magma *impl; 754 int ierr; 755 756 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 757 op->data = impl; 758 op->Destroy = CeedOperatorDestroy_Magma; 759 op->Apply = CeedOperatorApply_Magma; 760 op->GetQData = CeedOperatorGetQData_Magma; 761 return 0; 762 } 763 764 // ***************************************************************************** 765 // * INIT 766 // ***************************************************************************** 767 static int CeedInit_Magma(const char *resource, Ceed ceed) { 768 if (strcmp(resource, "/cpu/magma") 769 && strcmp(resource, "/gpu/magma")) 770 return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 771 772 magma_init(); 773 //magma_print_environment(); 774 775 ceed->VecCreate = CeedVectorCreate_Magma; 776 ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 777 ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 778 ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 779 ceed->OperatorCreate = CeedOperatorCreate_Magma; 780 return 0; 781 } 782 783 // ***************************************************************************** 784 // * REGISTER 785 // ***************************************************************************** 786 __attribute__((constructor)) 787 static void Register(void) { 788 CeedRegister("/cpu/magma", CeedInit_Magma); 789 CeedRegister("/gpu/magma", CeedInit_Magma); 790 } 791