12f21b5c6SHong Zhang #include <petscsys.h> /*I "petscsys.h" I*/ 22f21b5c6SHong Zhang 32f21b5c6SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 42f21b5c6SHong Zhang #include <hbwmalloc.h> 52f21b5c6SHong Zhang #endif 62f21b5c6SHong Zhang 72f21b5c6SHong Zhang /* 82f21b5c6SHong Zhang These are defined in mal.c and ensure that malloced space is PetscScalar aligned 92f21b5c6SHong Zhang */ 10*95c0884eSLisandro Dalcin PETSC_EXTERN PetscErrorCode PetscMallocAlign(size_t,int,const char[],const char[],void**); 11*95c0884eSLisandro Dalcin PETSC_EXTERN PetscErrorCode PetscFreeAlign(void*,int,const char[],const char[]); 12*95c0884eSLisandro Dalcin PETSC_EXTERN PetscErrorCode PetscReallocAlign(size_t,int,const char[],const char[],void**); 132f21b5c6SHong Zhang 142f21b5c6SHong Zhang /* 152f21b5c6SHong Zhang PetscHBWMalloc - HBW malloc. 162f21b5c6SHong Zhang 172f21b5c6SHong Zhang Input Parameters: 182f21b5c6SHong Zhang + a - number of bytes to allocate 192f21b5c6SHong Zhang . lineno - line number where used 202f21b5c6SHong Zhang . function - function calling routine 212f21b5c6SHong Zhang - filename - file name where used 222f21b5c6SHong Zhang 232f21b5c6SHong Zhang Returns: 242f21b5c6SHong Zhang double aligned pointer to requested storage, or null if not 252f21b5c6SHong Zhang available. 262f21b5c6SHong Zhang */ 2750a41461SHong Zhang static PetscErrorCode PetscHBWMalloc(size_t a,int lineno,const char function[],const char filename[],void **result) 282f21b5c6SHong Zhang { 292f21b5c6SHong Zhang #if !defined(PETSC_HAVE_MEMKIND) 302f21b5c6SHong Zhang return PetscMallocAlign(a,lineno,function,filename,result); 312f21b5c6SHong Zhang #else 322f21b5c6SHong Zhang if (!a) { *result = NULL; return 0; } 332f21b5c6SHong Zhang /* 342f21b5c6SHong Zhang The default policy is if insufficient memory is available from the high bandwidth memory 352f21b5c6SHong Zhang fall back to standard memory. If we use the HBW_POLICY_BIND policy, errno is set to ENOMEM 362f21b5c6SHong Zhang and the allocated pointer is set to NULL if there is not enough HWB memory available. 372f21b5c6SHong Zhang */ 382f21b5c6SHong Zhang { 392f21b5c6SHong Zhang int ierr = hbw_posix_memalign(result,PETSC_MEMALIGN,a); 402f21b5c6SHong Zhang if (ierr || !*result) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MEM,"HBW Memory requested %.0f",(PetscLogDouble)a); 412f21b5c6SHong Zhang } 422f21b5c6SHong Zhang return 0; 432f21b5c6SHong Zhang #endif 442f21b5c6SHong Zhang } 452f21b5c6SHong Zhang 460bf43a65SHong Zhang static PetscErrorCode PetscHBWFree(void *aa,int lineno,const char function[],const char filename[]) 472f21b5c6SHong Zhang { 482f21b5c6SHong Zhang #if !defined(PETSC_HAVE_MEMKIND) 490bf43a65SHong Zhang return PetscFreeAlign(aa,lineno,function,filename); 502f21b5c6SHong Zhang #else 512f21b5c6SHong Zhang hbw_free(aa); 522f21b5c6SHong Zhang return 0; 532f21b5c6SHong Zhang #endif 5413850c04SHong Zhang } 5513850c04SHong Zhang 560bf43a65SHong Zhang static PetscErrorCode PetscHBWRealloc(size_t a,int lineno,const char function[],const char filename[],void **result) 570bf43a65SHong Zhang { 580bf43a65SHong Zhang #if !defined(PETSC_HAVE_MEMKIND) 590bf43a65SHong Zhang return PetscReallocAlign(a,lineno,function,filename,result); 600bf43a65SHong Zhang #else 610bf43a65SHong Zhang if (!a) { 620bf43a65SHong Zhang int ierr = PetscFreeAlign(*result,lineno,function,filename); 630bf43a65SHong Zhang if (ierr) return ierr; 640bf43a65SHong Zhang *result = NULL; 650bf43a65SHong Zhang return 0; 660bf43a65SHong Zhang } 670bf43a65SHong Zhang *result = hbw_realloc(*result,a); 680bf43a65SHong Zhang if (!*result) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MEM,"Memory requested %.0f",(PetscLogDouble)a); 690bf43a65SHong Zhang return 0; 700bf43a65SHong Zhang #endif 710bf43a65SHong Zhang } 720bf43a65SHong Zhang 73*95c0884eSLisandro Dalcin PETSC_INTERN PetscErrorCode PetscSetUseHBWMalloc_Private(void) 7413850c04SHong Zhang { 7513850c04SHong Zhang PetscErrorCode ierr; 7613850c04SHong Zhang 7713850c04SHong Zhang PetscFunctionBegin; 7813850c04SHong Zhang ierr = PetscMallocSet(PetscHBWMalloc,PetscHBWFree);CHKERRQ(ierr); 790bf43a65SHong Zhang PetscTrRealloc = PetscHBWRealloc; 8013850c04SHong Zhang PetscFunctionReturn(0); 812f21b5c6SHong Zhang } 82