1*47d993e7Ssuyashtn /* Portions of this code are under: 2*47d993e7Ssuyashtn Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 3*47d993e7Ssuyashtn */ 4*47d993e7Ssuyashtn #if !defined(__MPIHIPSPARSEMATIMPL) 5*47d993e7Ssuyashtn #define __MPIHIPSPARSEMATIMPL 6*47d993e7Ssuyashtn 7*47d993e7Ssuyashtn #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 8*47d993e7Ssuyashtn #include <hipsparse/hipsparse.h> 9*47d993e7Ssuyashtn #else 10*47d993e7Ssuyashtn #include <hipsparse.h> 11*47d993e7Ssuyashtn #endif 12*47d993e7Ssuyashtn #include <petsc/private/hipvecimpl.h> 13*47d993e7Ssuyashtn 14*47d993e7Ssuyashtn struct Mat_MPIAIJHIPSPARSE { 15*47d993e7Ssuyashtn /* The following are used by GPU capabilities to store matrix storage formats on the device */ 16*47d993e7Ssuyashtn MatHIPSPARSEStorageFormat diagGPUMatFormat; 17*47d993e7Ssuyashtn MatHIPSPARSEStorageFormat offdiagGPUMatFormat; 18*47d993e7Ssuyashtn PetscSplitCSRDataStructure deviceMat; 19*47d993e7Ssuyashtn PetscInt coo_nd, coo_no; /* number of nonzero entries in coo for the diag/offdiag part */ 20*47d993e7Ssuyashtn THRUSTINTARRAY *coo_p; /* the permutation array that partitions the coo array into diag/offdiag parts */ 21*47d993e7Ssuyashtn THRUSTARRAY *coo_pw; /* the work array that stores the partitioned coo scalar values */ 22*47d993e7Ssuyashtn 23*47d993e7Ssuyashtn /* Extended COO stuff */ 24*47d993e7Ssuyashtn PetscCount *Ajmap1_d, *Aperm1_d; /* Local entries to diag */ 25*47d993e7Ssuyashtn PetscCount *Bjmap1_d, *Bperm1_d; /* Local entries to offdiag */ 26*47d993e7Ssuyashtn PetscCount *Aimap2_d, *Ajmap2_d, *Aperm2_d; /* Remote entries to diag */ 27*47d993e7Ssuyashtn PetscCount *Bimap2_d, *Bjmap2_d, *Bperm2_d; /* Remote entries to offdiag */ 28*47d993e7Ssuyashtn PetscCount *Cperm1_d; /* Permutation to fill send buffer. 'C' for communication */ 29*47d993e7Ssuyashtn PetscScalar *sendbuf_d, *recvbuf_d; /* Buffers for remote values in MatSetValuesCOO() */ 30*47d993e7Ssuyashtn PetscBool use_extended_coo; 31*47d993e7Ssuyashtn 32*47d993e7Ssuyashtn Mat_MPIAIJHIPSPARSE() 33*47d993e7Ssuyashtn { 34*47d993e7Ssuyashtn diagGPUMatFormat = MAT_HIPSPARSE_CSR; 35*47d993e7Ssuyashtn offdiagGPUMatFormat = MAT_HIPSPARSE_CSR; 36*47d993e7Ssuyashtn coo_p = NULL; 37*47d993e7Ssuyashtn coo_pw = NULL; 38*47d993e7Ssuyashtn deviceMat = NULL; 39*47d993e7Ssuyashtn use_extended_coo = PETSC_FALSE; 40*47d993e7Ssuyashtn } 41*47d993e7Ssuyashtn }; 42*47d993e7Ssuyashtn 43*47d993e7Ssuyashtn #endif 44