153022affSStefano Zampini #include <petscmat.h> 253022affSStefano Zampini #include <h2opus.h> 353022affSStefano Zampini 453022affSStefano Zampini #ifndef __MATH2OPUS_HPP 553022affSStefano Zampini #define __MATH2OPUS_HPP 653022affSStefano Zampini 7*9371c9d4SSatish Balay class PetscMatrixSampler : public HMatrixSampler { 853022affSStefano Zampini protected: 953022affSStefano Zampini Mat A; 1053022affSStefano Zampini typedef typename VectorContainer<H2OPUS_HWTYPE_CPU, H2Opus_Real>::type HRealVector; 1153022affSStefano Zampini typedef typename VectorContainer<H2OPUS_HWTYPE_CPU, int>::type HIntVector; 1253022affSStefano Zampini HIntVector hindexmap; 1353022affSStefano Zampini HRealVector hbuffer_in, hbuffer_out; 1453022affSStefano Zampini #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 1553022affSStefano Zampini H2OpusDeviceVector<int> dindexmap; 1653022affSStefano Zampini H2OpusDeviceVector<H2Opus_Real> dbuffer_in, dbuffer_out; 1753022affSStefano Zampini #endif 1853022affSStefano Zampini bool gpusampling; 1953022affSStefano Zampini h2opusComputeStream_t stream; 2053022affSStefano Zampini 2153022affSStefano Zampini private: 2253022affSStefano Zampini void Init(); 2353022affSStefano Zampini void VerifyBuffers(int); 2453022affSStefano Zampini void PermuteBuffersIn(int, H2Opus_Real *, H2Opus_Real **, H2Opus_Real *, H2Opus_Real **); 2553022affSStefano Zampini void PermuteBuffersOut(int, H2Opus_Real *); 2653022affSStefano Zampini 2753022affSStefano Zampini public: 2853022affSStefano Zampini PetscMatrixSampler(); 2953022affSStefano Zampini PetscMatrixSampler(Mat); 3053022affSStefano Zampini ~PetscMatrixSampler(); 3153022affSStefano Zampini void SetSamplingMat(Mat); 3253022affSStefano Zampini void SetIndexMap(int, int *); 3353022affSStefano Zampini void SetGPUSampling(bool); 3453022affSStefano Zampini void SetStream(h2opusComputeStream_t); 3553022affSStefano Zampini virtual void sample(H2Opus_Real *, H2Opus_Real *, int); 36*9371c9d4SSatish Balay Mat GetSamplingMat() { 37*9371c9d4SSatish Balay return A; 38*9371c9d4SSatish Balay } 3953022affSStefano Zampini }; 4053022affSStefano Zampini 41*9371c9d4SSatish Balay void PetscMatrixSampler::Init() { 4253022affSStefano Zampini this->A = NULL; 4353022affSStefano Zampini this->gpusampling = false; 4453022affSStefano Zampini this->stream = NULL; 4553022affSStefano Zampini } 4653022affSStefano Zampini 47*9371c9d4SSatish Balay PetscMatrixSampler::PetscMatrixSampler() { 4853022affSStefano Zampini Init(); 4953022affSStefano Zampini } 5053022affSStefano Zampini 51*9371c9d4SSatish Balay PetscMatrixSampler::PetscMatrixSampler(Mat A) { 5253022affSStefano Zampini Init(); 5353022affSStefano Zampini SetSamplingMat(A); 5453022affSStefano Zampini } 5553022affSStefano Zampini 56*9371c9d4SSatish Balay void PetscMatrixSampler::SetSamplingMat(Mat A) { 57300d917bSStefano Zampini PetscMPIInt size = 1; 5853022affSStefano Zampini 599566063dSJacob Faibussowitsch if (A) PetscCallVoid(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 609566063dSJacob Faibussowitsch if (size > 1) PetscCallVoid(PETSC_ERR_SUP); 619566063dSJacob Faibussowitsch PetscCallVoid(PetscObjectReference((PetscObject)A)); 629566063dSJacob Faibussowitsch PetscCallVoid(MatDestroy(&this->A)); 6353022affSStefano Zampini this->A = A; 6453022affSStefano Zampini } 6553022affSStefano Zampini 66*9371c9d4SSatish Balay void PetscMatrixSampler::SetStream(h2opusComputeStream_t stream) { 6753022affSStefano Zampini this->stream = stream; 6853022affSStefano Zampini } 6953022affSStefano Zampini 70*9371c9d4SSatish Balay void PetscMatrixSampler::SetIndexMap(int n, int *indexmap) { 7153022affSStefano Zampini copyVector(this->hindexmap, indexmap, n, H2OPUS_HWTYPE_CPU); 7253022affSStefano Zampini #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 7353022affSStefano Zampini copyVector(this->dindexmap, indexmap, n, H2OPUS_HWTYPE_CPU); 7453022affSStefano Zampini #endif 7553022affSStefano Zampini } 7653022affSStefano Zampini 77*9371c9d4SSatish Balay void PetscMatrixSampler::VerifyBuffers(int nv) { 7853022affSStefano Zampini if (this->hindexmap.size()) { 7953022affSStefano Zampini size_t n = this->hindexmap.size(); 8053022affSStefano Zampini if (!this->gpusampling) { 81*9371c9d4SSatish Balay if (hbuffer_in.size() < (size_t)n * nv) hbuffer_in.resize(n * nv); 82*9371c9d4SSatish Balay if (hbuffer_out.size() < (size_t)n * nv) hbuffer_out.resize(n * nv); 8353022affSStefano Zampini } else { 8453022affSStefano Zampini #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 85*9371c9d4SSatish Balay if (dbuffer_in.size() < (size_t)n * nv) dbuffer_in.resize(n * nv); 86*9371c9d4SSatish Balay if (dbuffer_out.size() < (size_t)n * nv) dbuffer_out.resize(n * nv); 8753022affSStefano Zampini #endif 8853022affSStefano Zampini } 8953022affSStefano Zampini } 9053022affSStefano Zampini } 9153022affSStefano Zampini 92*9371c9d4SSatish Balay void PetscMatrixSampler::PermuteBuffersIn(int nv, H2Opus_Real *v, H2Opus_Real **w, H2Opus_Real *ov, H2Opus_Real **ow) { 9353022affSStefano Zampini *w = v; 9453022affSStefano Zampini *ow = ov; 9553022affSStefano Zampini VerifyBuffers(nv); 9653022affSStefano Zampini if (this->hindexmap.size()) { 9753022affSStefano Zampini size_t n = this->hindexmap.size(); 9853022affSStefano Zampini if (!this->gpusampling) { 99*9371c9d4SSatish Balay permute_vectors(v, this->hbuffer_in.data(), n, nv, this->hindexmap.data(), 1, H2OPUS_HWTYPE_CPU, this->stream); 10053022affSStefano Zampini *w = this->hbuffer_in.data(); 10153022affSStefano Zampini *ow = this->hbuffer_out.data(); 10253022affSStefano Zampini } else { 10353022affSStefano Zampini #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 104*9371c9d4SSatish Balay permute_vectors(v, this->dbuffer_in.data(), n, nv, this->dindexmap.data(), 1, H2OPUS_HWTYPE_GPU, this->stream); 10553022affSStefano Zampini *w = this->dbuffer_in.data(); 10653022affSStefano Zampini *ow = this->dbuffer_out.data(); 10753022affSStefano Zampini #endif 10853022affSStefano Zampini } 10953022affSStefano Zampini } 11053022affSStefano Zampini } 11153022affSStefano Zampini 112*9371c9d4SSatish Balay void PetscMatrixSampler::PermuteBuffersOut(int nv, H2Opus_Real *v) { 11353022affSStefano Zampini VerifyBuffers(nv); 11453022affSStefano Zampini if (this->hindexmap.size()) { 11553022affSStefano Zampini size_t n = this->hindexmap.size(); 11653022affSStefano Zampini if (!this->gpusampling) { 117*9371c9d4SSatish Balay permute_vectors(this->hbuffer_out.data(), v, n, nv, this->hindexmap.data(), 0, H2OPUS_HWTYPE_CPU, this->stream); 11853022affSStefano Zampini } else { 11953022affSStefano Zampini #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 120*9371c9d4SSatish Balay permute_vectors(this->dbuffer_out.data(), v, n, nv, this->dindexmap.data(), 0, H2OPUS_HWTYPE_GPU, this->stream); 12153022affSStefano Zampini #endif 12253022affSStefano Zampini } 12353022affSStefano Zampini } 12453022affSStefano Zampini } 12553022affSStefano Zampini 126*9371c9d4SSatish Balay void PetscMatrixSampler::SetGPUSampling(bool gpusampling) { 12753022affSStefano Zampini this->gpusampling = gpusampling; 12853022affSStefano Zampini } 12953022affSStefano Zampini 130*9371c9d4SSatish Balay PetscMatrixSampler::~PetscMatrixSampler() { 1319566063dSJacob Faibussowitsch PetscCallVoid(MatDestroy(&A)); 13253022affSStefano Zampini } 13353022affSStefano Zampini 134*9371c9d4SSatish Balay void PetscMatrixSampler::sample(H2Opus_Real *x, H2Opus_Real *y, int samples) { 13553022affSStefano Zampini MPI_Comm comm = PetscObjectComm((PetscObject)this->A); 13653022affSStefano Zampini Mat X = NULL, Y = NULL; 13753022affSStefano Zampini PetscInt M, N, m, n; 13853022affSStefano Zampini H2Opus_Real *px, *py; 13953022affSStefano Zampini 1409566063dSJacob Faibussowitsch if (!this->A) PetscCallVoid(PETSC_ERR_PLIB); 1419566063dSJacob Faibussowitsch PetscCallVoid(MatGetSize(this->A, &M, &N)); 1429566063dSJacob Faibussowitsch PetscCallVoid(MatGetLocalSize(this->A, &m, &n)); 1439566063dSJacob Faibussowitsch PetscCallVoid(PetscObjectGetComm((PetscObject)A, &comm)); 14453022affSStefano Zampini PermuteBuffersIn(samples, x, &px, y, &py); 14553022affSStefano Zampini if (!this->gpusampling) { 1469566063dSJacob Faibussowitsch PetscCallVoid(MatCreateDense(comm, n, PETSC_DECIDE, N, samples, px, &X)); 1479566063dSJacob Faibussowitsch PetscCallVoid(MatCreateDense(comm, m, PETSC_DECIDE, M, samples, py, &Y)); 14853022affSStefano Zampini } else { 14953022affSStefano Zampini #if defined(PETSC_HAVE_CUDA) 1509566063dSJacob Faibussowitsch PetscCallVoid(MatCreateDenseCUDA(comm, n, PETSC_DECIDE, N, samples, px, &X)); 1519566063dSJacob Faibussowitsch PetscCallVoid(MatCreateDenseCUDA(comm, m, PETSC_DECIDE, M, samples, py, &Y)); 15253022affSStefano Zampini #endif 15353022affSStefano Zampini } 1549566063dSJacob Faibussowitsch PetscCallVoid(PetscLogObjectParent((PetscObject)this->A, (PetscObject)X)); 1559566063dSJacob Faibussowitsch PetscCallVoid(PetscLogObjectParent((PetscObject)this->A, (PetscObject)Y)); 1569566063dSJacob Faibussowitsch PetscCallVoid(MatMatMult(this->A, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 15753022affSStefano Zampini #if defined(PETSC_HAVE_CUDA) 15853022affSStefano Zampini if (this->gpusampling) { 15953022affSStefano Zampini const PetscScalar *dummy; 1609566063dSJacob Faibussowitsch PetscCallVoid(MatDenseCUDAGetArrayRead(Y, &dummy)); 1619566063dSJacob Faibussowitsch PetscCallVoid(MatDenseCUDARestoreArrayRead(Y, &dummy)); 16253022affSStefano Zampini } 16353022affSStefano Zampini #endif 16453022affSStefano Zampini PermuteBuffersOut(samples, y); 1659566063dSJacob Faibussowitsch PetscCallVoid(MatDestroy(&X)); 1669566063dSJacob Faibussowitsch PetscCallVoid(MatDestroy(&Y)); 16753022affSStefano Zampini } 16853022affSStefano Zampini 16953022affSStefano Zampini #endif 170