1a4963045SJacob Faibussowitsch #pragma once 20e6b6b59SJacob Faibussowitsch 30e6b6b59SJacob Faibussowitsch #include <petscdevice.h> 40e6b6b59SJacob Faibussowitsch #include <petscpkg_version.h> 50e6b6b59SJacob Faibussowitsch 6ce78bad3SBarry Smith /* MANSEC = Sys */ 7ce78bad3SBarry Smith /* SUBMANSEC = Device */ 8ce78bad3SBarry Smith 90e6b6b59SJacob Faibussowitsch #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__)) 100e6b6b59SJacob Faibussowitsch #define PETSC_USING_HCC 1 110e6b6b59SJacob Faibussowitsch #endif 120e6b6b59SJacob Faibussowitsch 130e6b6b59SJacob Faibussowitsch #if PetscDefined(HAVE_HIP) 140e6b6b59SJacob Faibussowitsch #include <hip/hip_runtime.h> 150e6b6b59SJacob Faibussowitsch 160e6b6b59SJacob Faibussowitsch #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 17731341e5SJunchao Zhang 18731341e5SJunchao Zhang // cupmScalarPtrCast() returns hip{Float,Double}Complex while hipBLAS uses hipBlas{Float,Double}Complex, causing many VecCUPM errors like 19731341e5SJunchao Zhang // error: no matching function for call to 'cupmBlasXdot'. 20731341e5SJunchao Zhang // Before rocm-6.0, one can define ROCM_MATHLIBS_API_USE_HIP_COMPLEX to force rocm to 'typedef hipDoubleComplex hipBlasDoubleComplex' for example. 21731341e5SJunchao Zhang // Since then, ROCM_MATHLIBS_API_USE_HIP_COMPLEX is deprecated, and one can define HIPBLAS_V2 to use version 2 of hipBLAS that directly use hipDoubleComplex etc. 22731341e5SJunchao Zhang // Per AMD, HIPBLAS_V2 will be removed in the future so that hipBLAS only provides updated APIs (but not yet in 6.2.2 as of Sep. 27, 2024). 23731341e5SJunchao Zhang // 24731341e5SJunchao Zhang // see https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.0.0/functions.html#complex-datatypes 25731341e5SJunchao Zhang // and https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.2.2/functions.html#hipblas-v2-and-deprecations 26731341e5SJunchao Zhang #if PETSC_PKG_HIP_VERSION_GE(6, 0, 0) 27731341e5SJunchao Zhang #define HIPBLAS_V2 28731341e5SJunchao Zhang #else 29731341e5SJunchao Zhang #define ROCM_MATHLIBS_API_USE_HIP_COMPLEX 30731341e5SJunchao Zhang #endif 310e6b6b59SJacob Faibussowitsch #include <hipblas/hipblas.h> 3247d993e7Ssuyashtn #include <hipsparse/hipsparse.h> 330e6b6b59SJacob Faibussowitsch #else 340e6b6b59SJacob Faibussowitsch #include <hipblas.h> 3547d993e7Ssuyashtn #include <hipsparse.h> 360e6b6b59SJacob Faibussowitsch #endif 370e6b6b59SJacob Faibussowitsch 38c0d63f2fSJustin Chang #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0) 39c0d63f2fSJustin Chang #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN 40c0d63f2fSJustin Chang #endif 41c0d63f2fSJustin Chang 420e6b6b59SJacob Faibussowitsch #if defined(__HIP_PLATFORM_NVCC__) 430e6b6b59SJacob Faibussowitsch #include <cusolverDn.h> 440e6b6b59SJacob Faibussowitsch #else // __HIP_PLATFORM_HCC__ 450e6b6b59SJacob Faibussowitsch #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 4647d993e7Ssuyashtn #include <hipsolver/hipsolver.h> 470e6b6b59SJacob Faibussowitsch #else 4847d993e7Ssuyashtn #include <hipsolver.h> 490e6b6b59SJacob Faibussowitsch #endif 500e6b6b59SJacob Faibussowitsch #endif // __HIP_PLATFORM_NVCC__ 510e6b6b59SJacob Faibussowitsch #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex 520e6b6b59SJacob Faibussowitsch 530e6b6b59SJacob Faibussowitsch // REMOVE ME 540e6b6b59SJacob Faibussowitsch #define WaitForHIP() hipDeviceSynchronize() 550e6b6b59SJacob Faibussowitsch 5647d993e7Ssuyashtn /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */ 570e6b6b59SJacob Faibussowitsch PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */ 5847d993e7Ssuyashtn PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */ 5947d993e7Ssuyashtn PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */ 600e6b6b59SJacob Faibussowitsch 610e6b6b59SJacob Faibussowitsch #define PetscCallHIP(...) \ 620e6b6b59SJacob Faibussowitsch do { \ 630e6b6b59SJacob Faibussowitsch const hipError_t _p_hip_err__ = __VA_ARGS__; \ 640e6b6b59SJacob Faibussowitsch if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \ 650e6b6b59SJacob Faibussowitsch const char *name = hipGetErrorName(_p_hip_err__); \ 660e6b6b59SJacob Faibussowitsch const char *descr = hipGetErrorString(_p_hip_err__); \ 670e6b6b59SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \ 680e6b6b59SJacob Faibussowitsch } \ 690e6b6b59SJacob Faibussowitsch } while (0) 700e6b6b59SJacob Faibussowitsch #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__) 710e6b6b59SJacob Faibussowitsch 7247d993e7Ssuyashtn #define PetscHIPCheckLaunch \ 7347d993e7Ssuyashtn do { \ 7447d993e7Ssuyashtn /* Check synchronous errors, i.e. pre-launch */ \ 7547d993e7Ssuyashtn PetscCallHIP(hipGetLastError()); \ 7647d993e7Ssuyashtn /* Check asynchronous errors, i.e. kernel failed (ULF) */ \ 7747d993e7Ssuyashtn PetscCallHIP(hipDeviceSynchronize()); \ 7847d993e7Ssuyashtn } while (0) 7947d993e7Ssuyashtn 800e6b6b59SJacob Faibussowitsch #define PetscCallHIPBLAS(...) \ 810e6b6b59SJacob Faibussowitsch do { \ 820e6b6b59SJacob Faibussowitsch const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \ 830e6b6b59SJacob Faibussowitsch if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \ 840e6b6b59SJacob Faibussowitsch const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \ 850e6b6b59SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \ 860e6b6b59SJacob Faibussowitsch } \ 870e6b6b59SJacob Faibussowitsch } while (0) 880e6b6b59SJacob Faibussowitsch #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__) 890e6b6b59SJacob Faibussowitsch 9047d993e7Ssuyashtn #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0) 9147d993e7Ssuyashtn /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */ 9247d993e7Ssuyashtn #define PetscCallHIPSPARSE(...) \ 9347d993e7Ssuyashtn do { \ 9447d993e7Ssuyashtn const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \ 9547d993e7Ssuyashtn if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \ 9647d993e7Ssuyashtn const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \ 9747d993e7Ssuyashtn PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \ 9847d993e7Ssuyashtn SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \ 9947d993e7Ssuyashtn } \ 10047d993e7Ssuyashtn } while (0) 10147d993e7Ssuyashtn #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__) 10247d993e7Ssuyashtn 1030e6b6b59SJacob Faibussowitsch #define PetscCallHIPSOLVER(...) \ 1040e6b6b59SJacob Faibussowitsch do { \ 1050e6b6b59SJacob Faibussowitsch const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \ 10647d993e7Ssuyashtn if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \ 10747d993e7Ssuyashtn const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \ 10847d993e7Ssuyashtn if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \ 10947d993e7Ssuyashtn SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 11047d993e7Ssuyashtn "hipSolver error %d (%s). " \ 11147d993e7Ssuyashtn "This indicates the GPU may have run out resources", \ 11247d993e7Ssuyashtn (PetscErrorCode)_p_hipsolver_stat__, name); \ 11347d993e7Ssuyashtn } else { \ 11447d993e7Ssuyashtn SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \ 11547d993e7Ssuyashtn } \ 11647d993e7Ssuyashtn } \ 1170e6b6b59SJacob Faibussowitsch } while (0) 1180e6b6b59SJacob Faibussowitsch #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__) 1190e6b6b59SJacob Faibussowitsch 12047d993e7Ssuyashtn #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */ 1210e6b6b59SJacob Faibussowitsch /* hipSolver does not exist yet so we work around it 1220e6b6b59SJacob Faibussowitsch rocSOLVER users rocBLAS for the handle 1230e6b6b59SJacob Faibussowitsch * */ 1240e6b6b59SJacob Faibussowitsch #if defined(__HIP_PLATFORM_NVCC__) 12547d993e7Ssuyashtn #include <cusolverDn.h> 1260e6b6b59SJacob Faibussowitsch typedef cusolverDnHandle_t hipsolverHandle_t; 1270e6b6b59SJacob Faibussowitsch typedef cusolverStatus_t hipsolverStatus_t; 1280e6b6b59SJacob Faibussowitsch 1290e6b6b59SJacob Faibussowitsch /* Alias hipsolverDestroy to cusolverDnDestroy */ 130d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle) 131d71ae5a4SJacob Faibussowitsch { 1320e6b6b59SJacob Faibussowitsch return cusolverDnDestroy(hipsolverhandle); 1330e6b6b59SJacob Faibussowitsch } 1340e6b6b59SJacob Faibussowitsch 1350e6b6b59SJacob Faibussowitsch /* Alias hipsolverCreate to cusolverDnCreate */ 136d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 137d71ae5a4SJacob Faibussowitsch { 1380e6b6b59SJacob Faibussowitsch return cusolverDnCreate(hipsolverhandle); 1390e6b6b59SJacob Faibussowitsch } 1400e6b6b59SJacob Faibussowitsch 1410e6b6b59SJacob Faibussowitsch /* Alias hipsolverGetStream to cusolverDnGetStream */ 142d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 143d71ae5a4SJacob Faibussowitsch { 1440e6b6b59SJacob Faibussowitsch return cusolverDnGetStream(handle, stream); 1450e6b6b59SJacob Faibussowitsch } 1460e6b6b59SJacob Faibussowitsch 1470e6b6b59SJacob Faibussowitsch /* Alias hipsolverSetStream to cusolverDnSetStream */ 148d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 149d71ae5a4SJacob Faibussowitsch { 1500e6b6b59SJacob Faibussowitsch return cusolveDnSetStream(handle, stream); 1510e6b6b59SJacob Faibussowitsch } 1520e6b6b59SJacob Faibussowitsch #else /* __HIP_PLATFORM_HCC__ */ 15347d993e7Ssuyashtn #include <rocsolver.h> 15447d993e7Ssuyashtn #include <rocblas.h> 1550e6b6b59SJacob Faibussowitsch typedef rocblas_handle hipsolverHandle_t; 1560e6b6b59SJacob Faibussowitsch typedef rocblas_status hipsolverStatus_t; 1570e6b6b59SJacob Faibussowitsch 1580e6b6b59SJacob Faibussowitsch /* Alias hipsolverDestroy to rocblas_destroy_handle */ 159d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle) 160d71ae5a4SJacob Faibussowitsch { 1610e6b6b59SJacob Faibussowitsch return rocblas_destroy_handle(hipsolverhandle); 1620e6b6b59SJacob Faibussowitsch } 1630e6b6b59SJacob Faibussowitsch 1640e6b6b59SJacob Faibussowitsch /* Alias hipsolverCreate to rocblas_destroy_handle */ 165d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 166d71ae5a4SJacob Faibussowitsch { 1670e6b6b59SJacob Faibussowitsch return rocblas_create_handle(hipsolverhandle); 1680e6b6b59SJacob Faibussowitsch } 1690e6b6b59SJacob Faibussowitsch 1700e6b6b59SJacob Faibussowitsch // Alias hipsolverGetStream to rocblas_get_stream 171d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 172d71ae5a4SJacob Faibussowitsch { 1730e6b6b59SJacob Faibussowitsch return rocblas_get_stream(handle, stream); 1740e6b6b59SJacob Faibussowitsch } 1750e6b6b59SJacob Faibussowitsch 1760e6b6b59SJacob Faibussowitsch // Alias hipsolverSetStream to rocblas_set_stream 177d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 178d71ae5a4SJacob Faibussowitsch { 1790e6b6b59SJacob Faibussowitsch return rocblas_set_stream(handle, stream); 1800e6b6b59SJacob Faibussowitsch } 1810e6b6b59SJacob Faibussowitsch #endif // __HIP_PLATFORM_NVCC__ 18247d993e7Ssuyashtn #endif /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */ 1830e6b6b59SJacob Faibussowitsch // REMOVE ME 1840e6b6b59SJacob Faibussowitsch PETSC_EXTERN hipStream_t PetscDefaultHipStream; // The default stream used by PETSc 1850e6b6b59SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *); 1860e6b6b59SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *); 1875c127019SJunchao Zhang PETSC_EXTERN PetscErrorCode PetscGetCurrentHIPStream(hipStream_t *); 1880e6b6b59SJacob Faibussowitsch 1890e6b6b59SJacob Faibussowitsch #endif // PETSC_HAVE_HIP 1900e6b6b59SJacob Faibussowitsch 19115af11aaSJacob Faibussowitsch // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the 19215af11aaSJacob Faibussowitsch // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros 19315af11aaSJacob Faibussowitsch // would already be defined, but they would be empty since we cannot be using NVCC at the same 19415af11aaSJacob Faibussowitsch // time. 1950e6b6b59SJacob Faibussowitsch #if PetscDefined(USING_HCC) 19615af11aaSJacob Faibussowitsch #undef PETSC_HOST_DECL 19715af11aaSJacob Faibussowitsch #undef PETSC_DEVICE_DECL 19815af11aaSJacob Faibussowitsch #undef PETSC_KERNEL_DECL 19915af11aaSJacob Faibussowitsch #undef PETSC_SHAREDMEM_DECL 20015af11aaSJacob Faibussowitsch #undef PETSC_FORCEINLINE 20115af11aaSJacob Faibussowitsch #undef PETSC_CONSTMEM_DECL 20215af11aaSJacob Faibussowitsch 2030e6b6b59SJacob Faibussowitsch #define PETSC_HOST_DECL __host__ 2040e6b6b59SJacob Faibussowitsch #define PETSC_DEVICE_DECL __device__ 2050e6b6b59SJacob Faibussowitsch #define PETSC_KERNEL_DECL __global__ 2060e6b6b59SJacob Faibussowitsch #define PETSC_SHAREDMEM_DECL __shared__ 2070e6b6b59SJacob Faibussowitsch #define PETSC_FORCEINLINE __forceinline__ 2080e6b6b59SJacob Faibussowitsch #define PETSC_CONSTMEM_DECL __constant__ 20915af11aaSJacob Faibussowitsch #endif 21015af11aaSJacob Faibussowitsch 211*beceaeb6SBarry Smith #if !defined(PETSC_HOST_DECL) // use HOST_DECL as canary 2120e6b6b59SJacob Faibussowitsch #define PETSC_HOST_DECL 2130e6b6b59SJacob Faibussowitsch #define PETSC_DEVICE_DECL 2140e6b6b59SJacob Faibussowitsch #define PETSC_KERNEL_DECL 2150e6b6b59SJacob Faibussowitsch #define PETSC_SHAREDMEM_DECL 2160e6b6b59SJacob Faibussowitsch #define PETSC_FORCEINLINE inline 2170e6b6b59SJacob Faibussowitsch #define PETSC_CONSTMEM_DECL 21815af11aaSJacob Faibussowitsch #endif 2190e6b6b59SJacob Faibussowitsch 220*beceaeb6SBarry Smith #if !defined(PETSC_DEVICE_DEFINED_DECLS_PRIVATE) 22115af11aaSJacob Faibussowitsch #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE 2220e6b6b59SJacob Faibussowitsch #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL 2230e6b6b59SJacob Faibussowitsch #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE 2240e6b6b59SJacob Faibussowitsch #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE 22515af11aaSJacob Faibussowitsch #endif 226