1e133240eSMatthew G Knepley 2e133240eSMatthew G Knepley /* 3e133240eSMatthew G Knepley Provides an interface to the CUFFT package. 4c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 5e133240eSMatthew G Knepley */ 6e133240eSMatthew G Knepley 7030f984aSJacob Faibussowitsch #include <petscdevice.h> 8af0996ceSBarry Smith #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 9e133240eSMatthew G Knepley 10e133240eSMatthew G Knepley typedef struct { 11e133240eSMatthew G Knepley PetscInt ndim; 12e133240eSMatthew G Knepley PetscInt *dim; 13e133240eSMatthew G Knepley cufftHandle p_forward, p_backward; 14e133240eSMatthew G Knepley cufftComplex *devArray; 15e133240eSMatthew G Knepley } Mat_CUFFT; 16e133240eSMatthew G Knepley 17e133240eSMatthew G Knepley PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) 18e133240eSMatthew G Knepley { 19e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 20e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 21e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 22e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 23e133240eSMatthew G Knepley 24e133240eSMatthew G Knepley PetscFunctionBegin; 25*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(x, &x_array)); 26*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(y, &y_array)); 27e133240eSMatthew G Knepley if (!cufft->p_forward) { 28e133240eSMatthew G Knepley /* create a plan, then execute it */ 29e133240eSMatthew G Knepley switch (ndim) { 30e133240eSMatthew G Knepley case 1: 31*5f80ce2aSJacob Faibussowitsch CHKERRCUFFT(cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1)); 32e133240eSMatthew G Knepley break; 33e133240eSMatthew G Knepley case 2: 34*5f80ce2aSJacob Faibussowitsch CHKERRCUFFT(cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C)); 35e133240eSMatthew G Knepley break; 36e133240eSMatthew G Knepley case 3: 37*5f80ce2aSJacob Faibussowitsch CHKERRCUFFT(cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C)); 38e133240eSMatthew G Knepley break; 39e133240eSMatthew G Knepley default: 40*5f80ce2aSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim); 41e133240eSMatthew G Knepley } 42e133240eSMatthew G Knepley } 43e133240eSMatthew G Knepley /* transfer to GPU memory */ 44*5f80ce2aSJacob Faibussowitsch CHKERRCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice)); 45e133240eSMatthew G Knepley /* execute transform */ 46*5f80ce2aSJacob Faibussowitsch CHKERRCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD)); 47e133240eSMatthew G Knepley /* transfer from GPU memory */ 48*5f80ce2aSJacob Faibussowitsch CHKERRCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost)); 49*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(y, &y_array)); 50*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(x, &x_array)); 51e133240eSMatthew G Knepley PetscFunctionReturn(0); 52e133240eSMatthew G Knepley } 53e133240eSMatthew G Knepley 54e133240eSMatthew G Knepley PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) 55e133240eSMatthew G Knepley { 56e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 57e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 58e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 59e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 60e133240eSMatthew G Knepley 61e133240eSMatthew G Knepley PetscFunctionBegin; 62*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(x, &x_array)); 63*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(y, &y_array)); 64e133240eSMatthew G Knepley if (!cufft->p_backward) { 65e133240eSMatthew G Knepley /* create a plan, then execute it */ 66e133240eSMatthew G Knepley switch (ndim) { 67e133240eSMatthew G Knepley case 1: 68*5f80ce2aSJacob Faibussowitsch CHKERRCUFFT(cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1)); 69e133240eSMatthew G Knepley break; 70e133240eSMatthew G Knepley case 2: 71*5f80ce2aSJacob Faibussowitsch CHKERRCUFFT(cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C)); 72e133240eSMatthew G Knepley break; 73e133240eSMatthew G Knepley case 3: 74*5f80ce2aSJacob Faibussowitsch CHKERRCUFFT(cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C)); 75e133240eSMatthew G Knepley break; 76e133240eSMatthew G Knepley default: 77*5f80ce2aSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim); 78e133240eSMatthew G Knepley } 79e133240eSMatthew G Knepley } 80e133240eSMatthew G Knepley /* transfer to GPU memory */ 81*5f80ce2aSJacob Faibussowitsch CHKERRCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice)); 82e133240eSMatthew G Knepley /* execute transform */ 83*5f80ce2aSJacob Faibussowitsch CHKERRCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE)); 84e133240eSMatthew G Knepley /* transfer from GPU memory */ 85*5f80ce2aSJacob Faibussowitsch CHKERRCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost)); 86*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(y, &y_array)); 87*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(x, &x_array)); 88e133240eSMatthew G Knepley PetscFunctionReturn(0); 89e133240eSMatthew G Knepley } 90e133240eSMatthew G Knepley 91e133240eSMatthew G Knepley PetscErrorCode MatDestroy_SeqCUFFT(Mat A) 92e133240eSMatthew G Knepley { 93e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 94e133240eSMatthew G Knepley 95e133240eSMatthew G Knepley PetscFunctionBegin; 96*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(cufft->dim)); 97*5f80ce2aSJacob Faibussowitsch if (cufft->p_forward) CHKERRCUFFT(cufftDestroy(cufft->p_forward)); 98*5f80ce2aSJacob Faibussowitsch if (cufft->p_backward) CHKERRCUFFT(cufftDestroy(cufft->p_backward)); 99*5f80ce2aSJacob Faibussowitsch CHKERRCUDA(cudaFree(cufft->devArray)); 100*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(A->data)); 101*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,0)); 102e133240eSMatthew G Knepley PetscFunctionReturn(0); 103e133240eSMatthew G Knepley } 104e133240eSMatthew G Knepley 105e133240eSMatthew G Knepley /*@ 106e133240eSMatthew G Knepley MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT 107e133240eSMatthew G Knepley 108d083f849SBarry Smith Collective 109e133240eSMatthew G Knepley 110e133240eSMatthew G Knepley Input Parameters: 111e133240eSMatthew G Knepley + comm - MPI communicator, set to PETSC_COMM_SELF 112e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform 113e133240eSMatthew G Knepley - dim - array of size ndim, dim[i] contains the vector length in the i-dimension 114e133240eSMatthew G Knepley 115e133240eSMatthew G Knepley Output Parameter: 116e133240eSMatthew G Knepley . A - the matrix 117e133240eSMatthew G Knepley 118e133240eSMatthew G Knepley Options Database Keys: 119e133240eSMatthew G Knepley . -mat_cufft_plannerflags - set CUFFT planner flags 120e133240eSMatthew G Knepley 121e133240eSMatthew G Knepley Level: intermediate 122e133240eSMatthew G Knepley @*/ 1237087cfbeSBarry Smith PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) 124e133240eSMatthew G Knepley { 125e133240eSMatthew G Knepley Mat_CUFFT *cufft; 126*5f80ce2aSJacob Faibussowitsch PetscInt m = 1; 127e133240eSMatthew G Knepley 128e133240eSMatthew G Knepley PetscFunctionBegin; 129*5f80ce2aSJacob Faibussowitsch PetscCheck(ndim >= 0,PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %" PetscInt_FMT " must be > 0", ndim); 130*5f80ce2aSJacob Faibussowitsch if (ndim) PetscValidIntPointer(dim,3); 131*5f80ce2aSJacob Faibussowitsch PetscValidPointer(A,4); 132*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(comm, A)); 133*5f80ce2aSJacob Faibussowitsch for (PetscInt d = 0; d < ndim; ++d) { 134*5f80ce2aSJacob Faibussowitsch PetscCheck(dim[d] >= 0,PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%" PetscInt_FMT "]=%" PetscInt_FMT " must be > 0", d, dim[d]); 135e133240eSMatthew G Knepley m *= dim[d]; 136e133240eSMatthew G Knepley } 137*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(*A, m, m, m, m)); 138*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT)); 139e133240eSMatthew G Knepley 140*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscNewLog(*A,&cufft)); 141e133240eSMatthew G Knepley (*A)->data = (void*) cufft; 142*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(ndim+1, &cufft->dim)); 143*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(cufft->dim, dim, ndim)); 1442205254eSKarl Rupp 145e133240eSMatthew G Knepley cufft->ndim = ndim; 146e133240eSMatthew G Knepley cufft->p_forward = 0; 147e133240eSMatthew G Knepley cufft->p_backward = 0; 148e133240eSMatthew G Knepley cufft->dim[ndim] = m; 149e133240eSMatthew G Knepley 150e133240eSMatthew G Knepley /* GPU memory allocation */ 151*5f80ce2aSJacob Faibussowitsch CHKERRCUDA(cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m)); 152e133240eSMatthew G Knepley 153e133240eSMatthew G Knepley (*A)->ops->mult = MatMult_SeqCUFFT; 154e133240eSMatthew G Knepley (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 155e133240eSMatthew G Knepley (*A)->assembled = PETSC_TRUE; 156e133240eSMatthew G Knepley (*A)->ops->destroy = MatDestroy_SeqCUFFT; 157e133240eSMatthew G Knepley 158*5f80ce2aSJacob Faibussowitsch /* get runtime options ...what options????? */ 159*5f80ce2aSJacob Faibussowitsch { 160*5f80ce2aSJacob Faibussowitsch PetscErrorCode ierr; 161*5f80ce2aSJacob Faibussowitsch 162e133240eSMatthew G Knepley ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr); 163e133240eSMatthew G Knepley ierr = PetscOptionsEnd();CHKERRQ(ierr); 164*5f80ce2aSJacob Faibussowitsch } 165e133240eSMatthew G Knepley PetscFunctionReturn(0); 166e133240eSMatthew G Knepley } 167