1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed-impl.h> 18 #include <string.h> 19 #include "magma.h" 20 #include "ceed-magma.h" 21 22 typedef struct { 23 CeedScalar *array; 24 CeedScalar *darray; 25 int own_; 26 int down_; 27 } CeedVector_Magma; 28 29 typedef struct { 30 CeedInt *indices; 31 CeedInt *dindices; 32 int own_; 33 int down_; // cover a case where we own Device memory 34 } CeedElemRestriction_Magma; 35 36 typedef struct { 37 CeedVector etmp; 38 CeedVector qdata; 39 } CeedOperator_Magma; 40 41 // ***************************************************************************** 42 // * Initialize vector vec (after free mem) with values from array based on cmode 43 // * CEED_COPY_VALUES: memory is allocated in vec->array_allocated, made equal 44 // * to array, and data is copied (not store passed pointer) 45 // * CEED_OWN_POINTER: vec->data->array_allocated and vec->data->array = array 46 // * CEED_USE_POINTER: vec->data->array = array (can modify; no ownership) 47 // * mtype: CEED_MEM_HOST or CEED_MEM_DEVICE 48 // ***************************************************************************** 49 static int CeedVectorSetArray_Magma(CeedVector vec, CeedMemType mtype, 50 CeedCopyMode cmode, CeedScalar *array) { 51 CeedVector_Magma *impl = vec->data; 52 int ierr; 53 54 // If own data, free the "old" data, e.g., as it may be of different size 55 if (impl->own_){ 56 magma_free( impl->darray ); 57 magma_free_pinned( impl->array ); 58 impl->darray = NULL; 59 impl->array = NULL; 60 impl->own_ = 0; 61 impl->down_= 0; 62 } 63 64 if (mtype == CEED_MEM_HOST) { 65 // memory is on the host; own_ = 0 66 switch (cmode) { 67 case CEED_COPY_VALUES: 68 ierr = magma_malloc( (void**)&impl->darray, 69 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 70 ierr = magma_malloc_pinned( (void**)&impl->array, 71 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 72 impl->own_ = 1; 73 74 if (array != NULL) 75 magma_setvector(vec->length, sizeof(array[0]), 76 array, 1, impl->darray, 1); 77 break; 78 case CEED_OWN_POINTER: 79 ierr = magma_malloc( (void**)&impl->darray, 80 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 81 // TODO: possible problem here is if we are passed non-pinned memory; 82 // (as we own it, lter in destroy, we use free for pinned memory). 83 impl->array = array; 84 impl->own_ = 1; 85 86 if (array != NULL) 87 magma_setvector(vec->length, sizeof(array[0]), 88 array, 1, impl->darray, 1); 89 break; 90 case CEED_USE_POINTER: 91 ierr = magma_malloc( (void**)&impl->darray, 92 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 93 magma_setvector(vec->length, sizeof(array[0]), 94 array, 1, impl->darray, 1); 95 impl->array = array; 96 } 97 } else if (mtype == CEED_MEM_DEVICE) { 98 // memory is on the device; own = 0 99 switch (cmode) { 100 case CEED_COPY_VALUES: 101 ierr = magma_malloc( (void**)&impl->darray, 102 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 103 ierr = magma_malloc_pinned( (void**)&impl->array, 104 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 105 impl->own_ = 1; 106 107 if (array) 108 magma_copyvector(vec->length, sizeof(array[0]), 109 array, 1, impl->darray, 1); 110 break; 111 case CEED_OWN_POINTER: 112 impl->darray = array; 113 ierr = magma_malloc_pinned( (void**)&impl->array, 114 vec->length * sizeof(CeedScalar)); CeedChk(ierr); 115 impl->own_ = 1; 116 117 break; 118 case CEED_USE_POINTER: 119 impl->darray = array; 120 impl->array = NULL; 121 } 122 123 } else 124 return CeedError(vec->ceed, 1, "Only MemType = HOST or DEVICE supported"); 125 126 return 0; 127 } 128 129 // ***************************************************************************** 130 // * Give data pointer from vector vec to array (on HOST or DEVICE) 131 // ***************************************************************************** 132 static int CeedVectorGetArray_Magma(CeedVector vec, CeedMemType mtype, 133 CeedScalar **array) { 134 CeedVector_Magma *impl = vec->data; 135 int ierr; 136 137 if (mtype == CEED_MEM_HOST) { 138 if (impl->own_) { 139 // data is owned so GPU had the most up-to-date version; copy it 140 magma_getvector(vec->length, sizeof(*array[0]), 141 impl->darray, 1, impl->array, 1); 142 } else if (impl->array == NULL) { 143 // Vector doesn't own the data and was set on GPU 144 if (impl->darray == NULL) { 145 // call was made just to allocate memory 146 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 147 CeedChk(ierr); 148 } else 149 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 150 } 151 *array = impl->array; 152 } else if (mtype == CEED_MEM_DEVICE) { 153 if (impl->darray == NULL){ 154 // Vector doesn't own the data and was set on the CPU 155 if (impl->array == NULL) { 156 // call was made just to allocate memory 157 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 158 CeedChk(ierr); 159 } else 160 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 161 } 162 *array = impl->darray; 163 } else 164 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 165 166 return 0; 167 } 168 169 // ***************************************************************************** 170 // * Give data pointer from vector vec to array (on HOST or DEVICE) to read it 171 // ***************************************************************************** 172 static int CeedVectorGetArrayRead_Magma(CeedVector vec, CeedMemType mtype, 173 const CeedScalar **array) { 174 CeedVector_Magma *impl = vec->data; 175 int ierr; 176 177 if (mtype == CEED_MEM_HOST) { 178 if (impl->own_) { 179 // data is owned so GPU had the most up-to-date version; copy it 180 magma_getvector(vec->length, sizeof(*array[0]), 181 impl->darray, 1, impl->array, 1); 182 } else if (impl->array == NULL) { 183 // Vector doesn't own the data and was set on GPU 184 if (impl->darray == NULL) { 185 // call was made just to allocate memory 186 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 187 CeedChk(ierr); 188 } else 189 return CeedError(vec->ceed, 1, "Can not access DEVICE vector on HOST"); 190 } 191 *array = impl->array; 192 } else if (mtype == CEED_MEM_DEVICE) { 193 if (impl->darray == NULL){ 194 // Vector doesn't own the data and was set on the CPU 195 if (impl->array == NULL) { 196 // call was made just to allocate memory 197 ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); 198 CeedChk(ierr); 199 } else 200 return CeedError(vec->ceed, 1, "Can not access HOST vector on DEVICE"); 201 } 202 *array = impl->darray; 203 } else 204 return CeedError(vec->ceed, 1, "Can only provide to HOST or DEVICE memory"); 205 206 return 0; 207 } 208 209 // ***************************************************************************** 210 // * There is no mtype here for array so it is not clear if we restore from HOST 211 // * memory or from DEVICE memory. We assume that it is CPU memory because if 212 // * it was GPU memory we would not call this routine at all. 213 // * Restore vector vec with values from array, where array received its values 214 // * from vec and possibly modified them. 215 // ***************************************************************************** 216 static int CeedVectorRestoreArray_Magma(CeedVector vec, CeedScalar **array) { 217 CeedVector_Magma *impl = vec->data; 218 219 // Check if the array is a CPU pointer 220 if (*array == impl->array) { 221 // Update device, if the device pointer is not NULL 222 if (impl->darray != NULL) { 223 magma_setvector(vec->length, sizeof(*array[0]), 224 *array, 1, impl->darray, 1); 225 } else 226 ; // nothing to do (case of CPU use pointer) 227 228 } else if (impl->down_) { 229 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 230 magma_getvector(vec->length, sizeof(*array[0]), 231 impl->darray, 1, impl->array, 1); 232 } 233 234 *array = NULL; 235 return 0; 236 } 237 238 // ***************************************************************************** 239 // * There is no mtype here for array so it is not clear if we restore from HOST 240 // * memory or from DEVICE memory. We assume that it is CPU memory because if 241 // * it was GPU memory we would not call this routine at all. 242 // * Restore vector vec with values from array, where array received its values 243 // * from vec to only read them; in this case vec may have been modified meanwhile 244 // * and needs to be restored here. 245 // ***************************************************************************** 246 static int CeedVectorRestoreArrayRead_Magma(CeedVector vec, 247 const CeedScalar **array) { 248 CeedVector_Magma *impl = vec->data; 249 250 // Check if the array is a CPU pointer 251 if (*array == impl->array) { 252 // Update device, if the device pointer is not NULL 253 if (impl->darray != NULL) 254 magma_setvector(vec->length, sizeof(*array[0]), 255 *array, 1, impl->darray, 1); 256 else 257 ; // nothing to do (case of CPU use pointer) 258 259 } else if (impl->down_) { 260 // nothing to do if array is on GPU, except if down_=1(case CPU use pointer) 261 magma_getvector(vec->length, sizeof(*array[0]), 262 impl->darray, 1, impl->array, 1); 263 } 264 265 *array = NULL; 266 return 0; 267 } 268 269 static int CeedVectorDestroy_Magma(CeedVector vec) { 270 CeedVector_Magma *impl = vec->data; 271 int ierr; 272 273 // Free if we own the data 274 if (impl->own_){ 275 ierr = magma_free_pinned(impl->array); CeedChk(ierr); 276 ierr = magma_free(impl->darray); CeedChk(ierr); 277 } else if (impl->down_){ 278 ierr = magma_free(impl->darray); CeedChk(ierr); 279 } 280 ierr = CeedFree(&vec->data); CeedChk(ierr); 281 return 0; 282 } 283 284 // ***************************************************************************** 285 // * Create vector vec of size n 286 // ***************************************************************************** 287 static int CeedVectorCreate_Magma(Ceed ceed, CeedInt n, CeedVector vec) { 288 CeedVector_Magma *impl; 289 int ierr; 290 291 vec->SetArray = CeedVectorSetArray_Magma; 292 vec->GetArray = CeedVectorGetArray_Magma; 293 vec->GetArrayRead = CeedVectorGetArrayRead_Magma; 294 vec->RestoreArray = CeedVectorRestoreArray_Magma; 295 vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Magma; 296 vec->Destroy = CeedVectorDestroy_Magma; 297 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 298 impl->darray = NULL; 299 impl->array = NULL; 300 impl->own_ = 0; 301 impl->down_= 0; 302 vec->data = impl; 303 return 0; 304 } 305 306 // ***************************************************************************** 307 // * Apply restriction operator r to u: v = r(rmode) u 308 // ***************************************************************************** 309 static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, 310 CeedTransposeMode tmode, CeedInt ncomp, 311 CeedTransposeMode lmode, CeedVector u, 312 CeedVector v, CeedRequest *request) { 313 CeedElemRestriction_Magma *impl = r->data; 314 int ierr; 315 const CeedScalar *uu; 316 CeedScalar *vv; 317 CeedInt nelem = r->nelem, elemsize = r->elemsize, ndof = r->ndof; 318 CeedInt esize = nelem * elemsize; 319 // CeedInt *indices = impl->indices; 320 CeedInt *dindices = impl->dindices; 321 322 //ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 323 //ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 324 325 // Get pointers on the device 326 ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &uu); CeedChk(ierr); 327 ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &vv); CeedChk(ierr); 328 329 if (tmode == CEED_NOTRANSPOSE) { 330 // Perform: v = r * u 331 if (ncomp == 1) { 332 //for (CeedInt i=0; i<esize; i++) vv[i] = uu[indices[i]]; 333 334 magma_template<<i=0:esize>> 335 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices){ 336 vv[i] = uu[dindices[i]]; 337 } 338 } else { 339 // vv is (elemsize x ncomp x nelem), column-major 340 if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 341 /* 342 for (CeedInt e = 0; e < nelem; e++) 343 for (CeedInt d = 0; d < ncomp; d++) 344 for (CeedInt i=0; i < elemsize; i++) { 345 vv[i + elemsize*(d+ncomp*e)] = 346 uu[indices[i+elemsize*e]+ndof*d]; 347 } 348 */ 349 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 350 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, int ndof) { 351 vv[i + iend*(d+dend*e)] = uu[dindices[i+iend*e]+ndof*d]; 352 } 353 } else { // u is (ncomp x ndof), column-major 354 /* 355 for (CeedInt e = 0; e < nelem; e++) 356 for (CeedInt d = 0; d < ncomp; d++) 357 for (CeedInt i=0; i< elemsize; i++) { 358 vv[i + elemsize*(d+ncomp*e)] = 359 uu[d+ncomp*indices[i+elemsize*e]]; 360 } 361 */ 362 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 363 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 364 vv[i + iend*(d+dend*e)] = uu[d+dend*dindices[i + iend*e]]; 365 } 366 } 367 } 368 } else { 369 // Note: in transpose mode, we perform: v += r^t * u 370 if (ncomp == 1) { 371 // fprintf(stderr,"3 ---------\n"); 372 // for (CeedInt i=0; i<esize; i++) vv[indices[i]] += uu[i]; 373 magma_template<<i=0:esize>> 374 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 375 magmablas_datomic_add( &vv[dindices[i]], uu[i]); 376 } 377 } else { // u is (elemsize x ncomp x nelem) 378 fprintf(stderr,"2 ---------\n"); 379 380 if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 381 /* 382 for (CeedInt e = 0; e < nelem; e++) 383 for (CeedInt d = 0; d < ncomp; d++) 384 for (CeedInt i=0; i < elemsize; i++) { 385 vv[indices[i + elemsize*e]+ndof*d] += 386 uu[i + elemsize*(d+e*ncomp)]; 387 } 388 */ 389 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 390 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices, CeedInt ndof) { 391 magmablas_datomic_add( &vv[dindices[i+iend*e]+ndof*d], 392 uu[i+iend*(d+e*dend)]); 393 } 394 } else { // vv is (ncomp x ndof), column-major 395 /* 396 for (CeedInt e = 0; e < nelem; e++) 397 for (CeedInt d = 0; d < ncomp; d++) 398 for (CeedInt i=0; i < elemsize; i++) { 399 vv[d+ncomp*indices[i + elemsize*e]] += 400 uu[i + elemsize*(d+e*ncomp)]; 401 } 402 */ 403 magma_template<<e=0:nelem, d=0:ncomp, i=0:elemsize>> 404 (const CeedScalar *uu, CeedScalar *vv, CeedInt *dindices) { 405 magmablas_datomic_add( &vv[d+dend*dindices[i + iend*e]], 406 uu[i+iend*(d+e*dend)]); 407 } 408 } 409 } 410 } 411 ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 412 ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 413 414 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 415 *request = NULL; 416 return 0; 417 } 418 419 static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { 420 CeedElemRestriction_Magma *impl = r->data; 421 int ierr; 422 423 // Free if we own the data 424 if (impl->own_){ 425 ierr = magma_free_pinned(impl->indices); CeedChk(ierr); 426 ierr = magma_free(impl->dindices); CeedChk(ierr); 427 } 428 else if (impl->down_){ 429 ierr = magma_free(impl->dindices); CeedChk(ierr); 430 } 431 ierr = CeedFree(&r->data); CeedChk(ierr); 432 return 0; 433 } 434 435 static int CeedElemRestrictionCreate_Magma(CeedElemRestriction r, 436 CeedMemType mtype, 437 CeedCopyMode cmode, 438 const CeedInt *indices) { 439 int ierr, size = r->nelem*r->elemsize; 440 CeedElemRestriction_Magma *impl; 441 442 // Allocate memory for the MAGMA Restricton and initializa pointers to NULL 443 ierr = CeedCalloc(1,&impl); CeedChk(ierr); 444 impl->dindices = NULL; 445 impl->indices = NULL; 446 impl->own_ = 0; 447 impl->down_= 0; 448 449 if (mtype == CEED_MEM_HOST) { 450 // memory is on the host; own_ = 0 451 switch (cmode) { 452 case CEED_COPY_VALUES: 453 ierr = magma_malloc( (void**)&impl->dindices, 454 size * sizeof(CeedInt)); CeedChk(ierr); 455 ierr = magma_malloc_pinned( (void**)&impl->indices, 456 size * sizeof(CeedInt)); CeedChk(ierr); 457 impl->own_ = 1; 458 459 if (indices != NULL) 460 magma_setvector(size, sizeof(CeedInt), 461 indices, 1, impl->dindices, 1); 462 break; 463 case CEED_OWN_POINTER: 464 ierr = magma_malloc( (void**)&impl->dindices, 465 size * sizeof(CeedInt)); CeedChk(ierr); 466 // TODO: possible problem here is if we are passed non-pinned memory; 467 // (as we own it, lter in destroy, we use free for pinned memory). 468 impl->indices = (CeedInt *)indices; 469 impl->own_ = 1; 470 471 if (indices != NULL) 472 magma_setvector(size, sizeof(CeedInt), 473 indices, 1, impl->dindices, 1); 474 break; 475 case CEED_USE_POINTER: 476 ierr = magma_malloc( (void**)&impl->dindices, 477 size * sizeof(CeedInt)); CeedChk(ierr); 478 magma_setvector(size, sizeof(CeedInt), 479 indices, 1, impl->dindices, 1); 480 impl->down_ = 1; 481 impl->indices = (CeedInt *)indices; 482 } 483 } else if (mtype == CEED_MEM_DEVICE) { 484 // memory is on the device; own = 0 485 switch (cmode) { 486 case CEED_COPY_VALUES: 487 ierr = magma_malloc( (void**)&impl->dindices, 488 size * sizeof(CeedInt)); CeedChk(ierr); 489 ierr = magma_malloc_pinned( (void**)&impl->indices, 490 size * sizeof(CeedInt)); CeedChk(ierr); 491 impl->own_ = 1; 492 493 if (indices) 494 magma_copyvector(size, sizeof(CeedInt), 495 indices, 1, impl->dindices, 1); 496 break; 497 case CEED_OWN_POINTER: 498 impl->dindices = (CeedInt *)indices; 499 ierr = magma_malloc_pinned( (void**)&impl->indices, 500 size * sizeof(CeedInt)); CeedChk(ierr); 501 impl->own_ = 1; 502 503 break; 504 case CEED_USE_POINTER: 505 impl->dindices = (CeedInt *)indices; 506 impl->indices = NULL; 507 } 508 509 } else 510 return CeedError(r->ceed, 1, "Only MemType = HOST or DEVICE supported"); 511 512 r->data = impl; 513 r->Apply = CeedElemRestrictionApply_Magma; 514 r->Destroy = CeedElemRestrictionDestroy_Magma; 515 516 return 0; 517 } 518 519 // Contracts on the middle index 520 // NOTRANSPOSE: V_ajc = T_jb U_abc 521 // TRANSPOSE: V_ajc = T_bj U_abc 522 // If Add != 0, "=" is replaced by "+=" 523 static int CeedTensorContract_Magma(Ceed ceed, 524 CeedInt A, CeedInt B, CeedInt C, CeedInt J, 525 const CeedScalar *t, CeedTransposeMode tmode, 526 const CeedInt Add, 527 const CeedScalar *u, CeedScalar *v) { 528 #ifdef USE_MAGMA_BATCH 529 magma_dtensor_contract(ceed, A, B, C, J, t, tmode, Add, u, v); 530 #else 531 CeedInt tstride0 = B, tstride1 = 1; 532 if (tmode == CEED_TRANSPOSE) { 533 tstride0 = 1; tstride1 = J; 534 } 535 CeedDebug("\033[31m[CeedTensorContract] A=%d, J=%d, C=%d, B=%d: %d %d %d", 536 A,J,C,B,A*J*B*C, C*J*A, C*B*A); 537 for (CeedInt a=0; a<A; a++) { 538 for (CeedInt j=0; j<J; j++) { 539 if (!Add) { 540 for (CeedInt c=0; c<C; c++) 541 v[(a*J+j)*C+c] = 0; 542 } 543 for (CeedInt b=0; b<B; b++) { 544 for (CeedInt c=0; c<C; c++) { 545 v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 546 } 547 } 548 } 549 } 550 #endif 551 return 0; 552 } 553 554 static int CeedBasisApply_Magma(CeedBasis basis, CeedTransposeMode tmode, 555 CeedEvalMode emode, 556 const CeedScalar *u, CeedScalar *v) { 557 int ierr; 558 const CeedInt dim = basis->dim; 559 const CeedInt ndof = basis->ndof; 560 const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 561 const CeedInt add = (tmode == CEED_TRANSPOSE); 562 563 CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d", 564 ndof*CeedPowInt(basis->P1d, dim)); 565 566 if (tmode == CEED_TRANSPOSE) { 567 const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 568 for (CeedInt i = 0; i < vsize; i++) 569 v[i] = (CeedScalar) 0; 570 } 571 if (emode & CEED_EVAL_INTERP) { 572 CeedInt P = basis->P1d, Q = basis->Q1d; 573 if (tmode == CEED_TRANSPOSE) { 574 P = basis->Q1d; Q = basis->P1d; 575 } 576 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 577 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 578 CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 579 ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)); 580 for (CeedInt d=0; d<dim; d++) { 581 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, basis->interp1d, 582 tmode, add&&(d==dim-1), 583 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 584 CeedChk(ierr); 585 pre /= P; 586 post *= Q; 587 } 588 if (tmode == CEED_NOTRANSPOSE) { 589 v += nqpt; 590 } else { 591 u += nqpt; 592 } 593 } 594 if (emode & CEED_EVAL_GRAD) { 595 CeedInt P = basis->P1d, Q = basis->Q1d; 596 // In CEED_NOTRANSPOSE mode: 597 // u is (P^dim x nc), column-major layout (nc = ndof) 598 // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 599 // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 600 if (tmode == CEED_TRANSPOSE) { 601 P = basis->Q1d, Q = basis->P1d; 602 } 603 CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 604 CeedDebug("\033[01m[CeedBasisApply_Magma] tmpsize = %d", 605 ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)); 606 for (CeedInt p = 0; p < dim; p++) { 607 CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 608 for (CeedInt d=0; d<dim; d++) { 609 ierr = CeedTensorContract_Magma(basis->ceed, pre, P, post, Q, 610 (p==d)?basis->grad1d:basis->interp1d, 611 tmode, add&&(d==dim-1), 612 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 613 CeedChk(ierr); 614 pre /= P; 615 post *= Q; 616 } 617 if (tmode == CEED_NOTRANSPOSE) { 618 v += nqpt; 619 } else { 620 u += nqpt; 621 } 622 } 623 } 624 if (emode & CEED_EVAL_WEIGHT) { 625 if (tmode == CEED_TRANSPOSE) 626 return CeedError(basis->ceed, 1, 627 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 628 CeedInt Q = basis->Q1d; 629 for (CeedInt d=0; d<dim; d++) { 630 CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 631 for (CeedInt i=0; i<pre; i++) { 632 for (CeedInt j=0; j<Q; j++) { 633 for (CeedInt k=0; k<post; k++) { 634 v[(i*Q + j)*post + k] = basis->qweight1d[j] 635 * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 636 } 637 } 638 } 639 } 640 } 641 return 0; 642 } 643 644 static int CeedBasisDestroy_Magma(CeedBasis basis) { 645 return 0; 646 } 647 648 static int CeedBasisCreateTensorH1_Magma(Ceed ceed, CeedInt dim, CeedInt P1d, 649 CeedInt Q1d, const CeedScalar *interp1d, 650 const CeedScalar *grad1d, 651 const CeedScalar *qref1d, 652 const CeedScalar *qweight1d, 653 CeedBasis basis) { 654 basis->Apply = CeedBasisApply_Magma; 655 basis->Destroy = CeedBasisDestroy_Magma; 656 return 0; 657 } 658 659 static int CeedQFunctionApply_Magma(CeedQFunction qf, void *qdata, CeedInt Q, 660 const CeedScalar *const *u, 661 CeedScalar *const *v) { 662 int ierr; 663 ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr); 664 return 0; 665 } 666 667 static int CeedQFunctionDestroy_Magma(CeedQFunction qf) { 668 return 0; 669 } 670 671 static int CeedQFunctionCreate_Magma(CeedQFunction qf) { 672 qf->Apply = CeedQFunctionApply_Magma; 673 qf->Destroy = CeedQFunctionDestroy_Magma; 674 return 0; 675 } 676 677 static int CeedOperatorDestroy_Magma(CeedOperator op) { 678 CeedOperator_Magma *impl = op->data; 679 int ierr; 680 681 ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr); 682 ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr); 683 ierr = CeedFree(&op->data); CeedChk(ierr); 684 return 0; 685 } 686 687 static int CeedOperatorApply_Magma(CeedOperator op, CeedVector qdata, 688 CeedVector ustate, 689 CeedVector residual, CeedRequest *request) { 690 CeedOperator_Magma *impl = op->data; 691 CeedVector etmp; 692 CeedInt Q; 693 const CeedInt nc = op->basis->ndof, dim = op->basis->dim; 694 CeedScalar *Eu; 695 char *qd; 696 int ierr; 697 CeedTransposeMode lmode = CEED_NOTRANSPOSE; 698 699 if (!impl->etmp) { 700 ierr = CeedVectorCreate(op->ceed, 701 nc * op->Erestrict->nelem * op->Erestrict->elemsize, 702 &impl->etmp); CeedChk(ierr); 703 // etmp is allocated when CeedVectorGetArray is called below 704 } 705 etmp = impl->etmp; 706 if (op->qf->inmode & ~CEED_EVAL_WEIGHT) { 707 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, 708 nc, lmode, ustate, etmp, 709 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 710 } 711 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 712 ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr); 713 ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); 714 CeedChk(ierr); 715 716 for (CeedInt e=0; e<op->Erestrict->nelem; e++) { 717 CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0}; 718 const CeedScalar *in[5] = {0,0,0,0,0}; 719 // TODO: quadrature weights can be computed just once 720 CeedDebug("\033[11m[CeedOperatorApply_Magma] e=%d: Eu+%d, %d", 721 e, e*op->Erestrict->elemsize*nc, Q*nc*(dim+2)); 722 ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode, 723 &Eu[e*op->Erestrict->elemsize*nc], BEu); 724 CeedChk(ierr); 725 CeedScalar *u_ptr = BEu, *v_ptr = BEv; 726 if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; } 727 if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; } 728 if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; } 729 if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; } 730 if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; } 731 ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out); 732 CeedChk(ierr); 733 CeedDebug("\033[31m[CeedOperatorApply_Magma] e=%d: ",e); 734 ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv, 735 &Eu[e*op->Erestrict->elemsize*nc]); 736 CeedChk(ierr); 737 } 738 ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr); 739 if (residual) { 740 CeedScalar *res; 741 CeedVectorGetArray(residual, CEED_MEM_HOST, &res); 742 for (int i = 0; i < residual->length; i++) 743 res[i] = (CeedScalar)0; 744 ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, 745 nc, lmode, etmp, residual, 746 CEED_REQUEST_IMMEDIATE); CeedChk(ierr); 747 } 748 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 749 *request = NULL; 750 return 0; 751 } 752 753 static int CeedOperatorGetQData_Magma(CeedOperator op, CeedVector *qdata) { 754 CeedOperator_Magma *impl = op->data; 755 int ierr; 756 757 if (!impl->qdata) { 758 CeedInt Q; 759 ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr); 760 ierr = CeedVectorCreate(op->ceed, 761 op->Erestrict->nelem * Q 762 * op->qf->qdatasize / sizeof(CeedScalar), 763 &impl->qdata); CeedChk(ierr); 764 } 765 *qdata = impl->qdata; 766 return 0; 767 } 768 769 static int CeedOperatorCreate_Magma(CeedOperator op) { 770 CeedOperator_Magma *impl; 771 int ierr; 772 773 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 774 op->data = impl; 775 op->Destroy = CeedOperatorDestroy_Magma; 776 op->Apply = CeedOperatorApply_Magma; 777 op->GetQData = CeedOperatorGetQData_Magma; 778 return 0; 779 } 780 781 // ***************************************************************************** 782 // * INIT 783 // ***************************************************************************** 784 static int CeedInit_Magma(const char *resource, Ceed ceed) { 785 if (strcmp(resource, "/cpu/magma") 786 && strcmp(resource, "/gpu/magma")) 787 return CeedError(ceed, 1, "MAGMA backend cannot use resource: %s", resource); 788 789 magma_init(); 790 //magma_print_environment(); 791 792 ceed->VecCreate = CeedVectorCreate_Magma; 793 ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Magma; 794 ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Magma; 795 ceed->QFunctionCreate = CeedQFunctionCreate_Magma; 796 ceed->OperatorCreate = CeedOperatorCreate_Magma; 797 return 0; 798 } 799 800 // ***************************************************************************** 801 // * REGISTER 802 // ***************************************************************************** 803 __attribute__((constructor)) 804 static void Register(void) { 805 CeedRegister("/cpu/magma", CeedInit_Magma); 806 CeedRegister("/gpu/magma", CeedInit_Magma); 807 } 808