12a01fb1fSRichard Tran Mills #include <petscsys.h> /*I "petscsys.h" I*/ 20e6b6b59SJacob Faibussowitsch #include <petscdevice_cuda.h> /* Needed to provide PetscCallCUDA() */ 32a01fb1fSRichard Tran Mills 4*8eb1d50fSPierre Jolivet static PetscErrorCode PetscCUDAHostMalloc(size_t a, PetscBool, int, const char[], const char[], void **result) 5d71ae5a4SJacob Faibussowitsch { 69566063dSJacob Faibussowitsch PetscCallCUDA(cudaMallocHost(result, a)); 72a01fb1fSRichard Tran Mills return 0; 82a01fb1fSRichard Tran Mills } 92a01fb1fSRichard Tran Mills 10*8eb1d50fSPierre Jolivet static PetscErrorCode PetscCUDAHostFree(void *aa, int, const char[], const char[]) 11d71ae5a4SJacob Faibussowitsch { 129566063dSJacob Faibussowitsch PetscCallCUDA(cudaFreeHost(aa)); 132a01fb1fSRichard Tran Mills return 0; 142a01fb1fSRichard Tran Mills } 152a01fb1fSRichard Tran Mills 16*8eb1d50fSPierre Jolivet static PetscErrorCode PetscCUDAHostRealloc(size_t, int, const char[], const char[], void **) 17d71ae5a4SJacob Faibussowitsch { 182a01fb1fSRichard Tran Mills SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "CUDA has no Realloc()"); 192a01fb1fSRichard Tran Mills } 202a01fb1fSRichard Tran Mills 212a01fb1fSRichard Tran Mills static PetscErrorCode (*PetscMallocOld)(size_t, PetscBool, int, const char[], const char[], void **); 2242e673e7SRichard Tran Mills static PetscErrorCode (*PetscReallocOld)(size_t, int, const char[], const char[], void **); 232a01fb1fSRichard Tran Mills static PetscErrorCode (*PetscFreeOld)(void *, int, const char[], const char[]); 242a01fb1fSRichard Tran Mills 252a01fb1fSRichard Tran Mills /*@C 26811af0c4SBarry Smith PetscMallocSetCUDAHost - Set `PetscMalloc()` to use `CUDAHostMalloc()` 272a01fb1fSRichard Tran Mills Switch the current malloc and free routines to the CUDA malloc and free routines 282a01fb1fSRichard Tran Mills 292a01fb1fSRichard Tran Mills Not Collective 302a01fb1fSRichard Tran Mills 312a01fb1fSRichard Tran Mills Level: developer 322a01fb1fSRichard Tran Mills 33811af0c4SBarry Smith Note: 342a01fb1fSRichard Tran Mills This provides a way to use the CUDA malloc and free routines temporarily. One 35811af0c4SBarry Smith can switch back to the previous choice by calling `PetscMallocResetCUDAHost()`. 362a01fb1fSRichard Tran Mills 37811af0c4SBarry Smith .seealso: `PetscCUDAHostMalloc()`, `PetscMallocResetCUDAHost()`, `PetscMallocSetHIPHost()` 382a01fb1fSRichard Tran Mills @*/ 39d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscMallocSetCUDAHost(void) 40d71ae5a4SJacob Faibussowitsch { 412a01fb1fSRichard Tran Mills PetscFunctionBegin; 422a01fb1fSRichard Tran Mills /* Save the previous choice */ 432a01fb1fSRichard Tran Mills PetscMallocOld = PetscTrMalloc; 4442e673e7SRichard Tran Mills PetscReallocOld = PetscTrRealloc; 452a01fb1fSRichard Tran Mills PetscFreeOld = PetscTrFree; 4647e6383dSRichard Tran Mills PetscTrMalloc = PetscCUDAHostMalloc; 4747e6383dSRichard Tran Mills PetscTrRealloc = PetscCUDAHostRealloc; 4847e6383dSRichard Tran Mills PetscTrFree = PetscCUDAHostFree; 492a01fb1fSRichard Tran Mills PetscFunctionReturn(0); 502a01fb1fSRichard Tran Mills } 512a01fb1fSRichard Tran Mills 522a01fb1fSRichard Tran Mills /*@C 53811af0c4SBarry Smith PetscMallocResetCUDAHost - Reset the changes made by `PetscMallocSetCUDAHost()` 542a01fb1fSRichard Tran Mills 552a01fb1fSRichard Tran Mills Not Collective 562a01fb1fSRichard Tran Mills 572a01fb1fSRichard Tran Mills Level: developer 582a01fb1fSRichard Tran Mills 59811af0c4SBarry Smith .seealso: `PetscCUDAHostMalloc()`, `PetscMallocSetCUDAHost()` 602a01fb1fSRichard Tran Mills @*/ 61d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscMallocResetCUDAHost(void) 62d71ae5a4SJacob Faibussowitsch { 632a01fb1fSRichard Tran Mills PetscFunctionBegin; 642a01fb1fSRichard Tran Mills PetscTrMalloc = PetscMallocOld; 6542e673e7SRichard Tran Mills PetscTrRealloc = PetscReallocOld; 662a01fb1fSRichard Tran Mills PetscTrFree = PetscFreeOld; 672a01fb1fSRichard Tran Mills PetscFunctionReturn(0); 682a01fb1fSRichard Tran Mills } 69