12a01fb1fSRichard Tran Mills #include <petscsys.h> /*I "petscsys.h" I*/ 22a01fb1fSRichard Tran Mills #include <petsccublas.h> /* Needed to provide CHKERRCUDA() */ 32a01fb1fSRichard Tran Mills 42a01fb1fSRichard Tran Mills static PetscErrorCode PetscCudaHostMalloc(size_t a,PetscBool clear,int lineno,const char function[],const char filename[],void **result) 52a01fb1fSRichard Tran Mills { 62a01fb1fSRichard Tran Mills cudaError_t ierr; 72a01fb1fSRichard Tran Mills ierr = cudaMallocHost(result,a);CHKERRCUDA(ierr); 82a01fb1fSRichard Tran Mills return 0; 92a01fb1fSRichard Tran Mills } 102a01fb1fSRichard Tran Mills 112a01fb1fSRichard Tran Mills static PetscErrorCode PetscCudaHostFree(void *aa,int lineno,const char function[],const char filename[]) 122a01fb1fSRichard Tran Mills { 132a01fb1fSRichard Tran Mills cudaError_t ierr; 142a01fb1fSRichard Tran Mills ierr = cudaFreeHost(aa);CHKERRCUDA(ierr); 152a01fb1fSRichard Tran Mills return 0; 162a01fb1fSRichard Tran Mills } 172a01fb1fSRichard Tran Mills 182a01fb1fSRichard Tran Mills static PetscErrorCode PetscCudaHostRealloc(size_t a,int lineno,const char function[],const char filename[],void **result) 192a01fb1fSRichard Tran Mills { 202a01fb1fSRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"CUDA has no Realloc()"); 212a01fb1fSRichard Tran Mills return 0; 222a01fb1fSRichard Tran Mills } 232a01fb1fSRichard Tran Mills 242a01fb1fSRichard Tran Mills static PetscErrorCode (*PetscMallocOld)(size_t,PetscBool,int,const char[],const char[],void**); 25*42e673e7SRichard Tran Mills static PetscErrorCode (*PetscReallocOld)(size_t,int,const char[],const char[],void**); 262a01fb1fSRichard Tran Mills static PetscErrorCode (*PetscFreeOld)(void*,int,const char[],const char[]); 272a01fb1fSRichard Tran Mills 282a01fb1fSRichard Tran Mills /*@C 292a01fb1fSRichard Tran Mills PetscMallocSetCUDAHost - Set PetscMalloc to use CUDAHostMalloc 302a01fb1fSRichard Tran Mills Switch the current malloc and free routines to the CUDA malloc and free routines 312a01fb1fSRichard Tran Mills 322a01fb1fSRichard Tran Mills Not Collective 332a01fb1fSRichard Tran Mills 342a01fb1fSRichard Tran Mills Level: developer 352a01fb1fSRichard Tran Mills 362a01fb1fSRichard Tran Mills Notes: 372a01fb1fSRichard Tran Mills This provides a way to use the CUDA malloc and free routines temporarily. One 382a01fb1fSRichard Tran Mills can switch back to the previous choice by calling PetscMallocResetCUDAHost(). 392a01fb1fSRichard Tran Mills 402a01fb1fSRichard Tran Mills .seealso: PetscMallocResetCUDAHost() 412a01fb1fSRichard Tran Mills @*/ 422a01fb1fSRichard Tran Mills PetscErrorCode PetscMallocSetCUDAHost(void) 432a01fb1fSRichard Tran Mills { 442a01fb1fSRichard Tran Mills PetscFunctionBegin; 452a01fb1fSRichard Tran Mills /* Save the previous choice */ 462a01fb1fSRichard Tran Mills PetscMallocOld = PetscTrMalloc; 47*42e673e7SRichard Tran Mills PetscReallocOld = PetscTrRealloc; 482a01fb1fSRichard Tran Mills PetscFreeOld = PetscTrFree; 492a01fb1fSRichard Tran Mills PetscTrMalloc = PetscCudaHostMalloc; 50*42e673e7SRichard Tran Mills PetscTrRealloc = PetscCudaHostRealloc; 512a01fb1fSRichard Tran Mills PetscTrFree = PetscCudaHostFree; 522a01fb1fSRichard Tran Mills PetscFunctionReturn(0); 532a01fb1fSRichard Tran Mills } 542a01fb1fSRichard Tran Mills 552a01fb1fSRichard Tran Mills /*@C 562a01fb1fSRichard Tran Mills PetscMallocResetCUDAHost - Reset the changes made by PetscMallocSetCUDAHost 572a01fb1fSRichard Tran Mills 582a01fb1fSRichard Tran Mills Not Collective 592a01fb1fSRichard Tran Mills 602a01fb1fSRichard Tran Mills Level: developer 612a01fb1fSRichard Tran Mills 622a01fb1fSRichard Tran Mills .seealso: PetscMallocSetCUDAHost() 632a01fb1fSRichard Tran Mills @*/ 642a01fb1fSRichard Tran Mills PetscErrorCode PetscMallocResetCUDAHost(void) 652a01fb1fSRichard Tran Mills { 662a01fb1fSRichard Tran Mills PetscFunctionBegin; 672a01fb1fSRichard Tran Mills PetscTrMalloc = PetscMallocOld; 68*42e673e7SRichard Tran Mills PetscTrRealloc = PetscReallocOld; 692a01fb1fSRichard Tran Mills PetscTrFree = PetscFreeOld; 702a01fb1fSRichard Tran Mills PetscFunctionReturn(0); 712a01fb1fSRichard Tran Mills } 72