1*2a01fb1fSRichard Tran Mills #include <petscsys.h> /*I "petscsys.h" I*/ 2*2a01fb1fSRichard Tran Mills #include <petsccublas.h> /* Needed to provide CHKERRCUDA() */ 3*2a01fb1fSRichard Tran Mills 4*2a01fb1fSRichard Tran Mills static PetscErrorCode PetscCudaHostMalloc(size_t a,PetscBool clear,int lineno,const char function[],const char filename[],void **result) 5*2a01fb1fSRichard Tran Mills { 6*2a01fb1fSRichard Tran Mills cudaError_t ierr; 7*2a01fb1fSRichard Tran Mills ierr = cudaMallocHost(result,a);CHKERRCUDA(ierr); 8*2a01fb1fSRichard Tran Mills return 0; 9*2a01fb1fSRichard Tran Mills } 10*2a01fb1fSRichard Tran Mills 11*2a01fb1fSRichard Tran Mills static PetscErrorCode PetscCudaHostFree(void *aa,int lineno,const char function[],const char filename[]) 12*2a01fb1fSRichard Tran Mills { 13*2a01fb1fSRichard Tran Mills cudaError_t ierr; 14*2a01fb1fSRichard Tran Mills ierr = cudaFreeHost(aa);CHKERRCUDA(ierr); 15*2a01fb1fSRichard Tran Mills return 0; 16*2a01fb1fSRichard Tran Mills } 17*2a01fb1fSRichard Tran Mills 18*2a01fb1fSRichard Tran Mills static PetscErrorCode PetscCudaHostRealloc(size_t a,int lineno,const char function[],const char filename[],void **result) 19*2a01fb1fSRichard Tran Mills { 20*2a01fb1fSRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"CUDA has no Realloc()"); 21*2a01fb1fSRichard Tran Mills return 0; 22*2a01fb1fSRichard Tran Mills } 23*2a01fb1fSRichard Tran Mills 24*2a01fb1fSRichard Tran Mills static PetscErrorCode (*PetscMallocOld)(size_t,PetscBool,int,const char[],const char[],void**); 25*2a01fb1fSRichard Tran Mills static PetscErrorCode (*PetscFreeOld)(void*,int,const char[],const char[]); 26*2a01fb1fSRichard Tran Mills 27*2a01fb1fSRichard Tran Mills /*@C 28*2a01fb1fSRichard Tran Mills PetscMallocSetCUDAHost - Set PetscMalloc to use CUDAHostMalloc 29*2a01fb1fSRichard Tran Mills Switch the current malloc and free routines to the CUDA malloc and free routines 30*2a01fb1fSRichard Tran Mills 31*2a01fb1fSRichard Tran Mills Not Collective 32*2a01fb1fSRichard Tran Mills 33*2a01fb1fSRichard Tran Mills Level: developer 34*2a01fb1fSRichard Tran Mills 35*2a01fb1fSRichard Tran Mills Notes: 36*2a01fb1fSRichard Tran Mills This provides a way to use the CUDA malloc and free routines temporarily. One 37*2a01fb1fSRichard Tran Mills can switch back to the previous choice by calling PetscMallocResetCUDAHost(). 38*2a01fb1fSRichard Tran Mills 39*2a01fb1fSRichard Tran Mills .seealso: PetscMallocResetCUDAHost() 40*2a01fb1fSRichard Tran Mills @*/ 41*2a01fb1fSRichard Tran Mills PetscErrorCode PetscMallocSetCUDAHost(void) 42*2a01fb1fSRichard Tran Mills { 43*2a01fb1fSRichard Tran Mills PetscFunctionBegin; 44*2a01fb1fSRichard Tran Mills /* Save the previous choice */ 45*2a01fb1fSRichard Tran Mills PetscMallocOld = PetscTrMalloc; 46*2a01fb1fSRichard Tran Mills PetscFreeOld = PetscTrFree; 47*2a01fb1fSRichard Tran Mills PetscTrMalloc = PetscCudaHostMalloc; 48*2a01fb1fSRichard Tran Mills PetscTrFree = PetscCudaHostFree; 49*2a01fb1fSRichard Tran Mills PetscFunctionReturn(0); 50*2a01fb1fSRichard Tran Mills } 51*2a01fb1fSRichard Tran Mills 52*2a01fb1fSRichard Tran Mills /*@C 53*2a01fb1fSRichard Tran Mills PetscMallocResetCUDAHost - Reset the changes made by PetscMallocSetCUDAHost 54*2a01fb1fSRichard Tran Mills 55*2a01fb1fSRichard Tran Mills Not Collective 56*2a01fb1fSRichard Tran Mills 57*2a01fb1fSRichard Tran Mills Level: developer 58*2a01fb1fSRichard Tran Mills 59*2a01fb1fSRichard Tran Mills .seealso: PetscMallocSetCUDAHost() 60*2a01fb1fSRichard Tran Mills @*/ 61*2a01fb1fSRichard Tran Mills PetscErrorCode PetscMallocResetCUDAHost(void) 62*2a01fb1fSRichard Tran Mills { 63*2a01fb1fSRichard Tran Mills PetscFunctionBegin; 64*2a01fb1fSRichard Tran Mills PetscTrMalloc = PetscMallocOld; 65*2a01fb1fSRichard Tran Mills PetscTrFree = PetscFreeOld; 66*2a01fb1fSRichard Tran Mills PetscFunctionReturn(0); 67*2a01fb1fSRichard Tran Mills } 68