1e133240eSMatthew G Knepley 2e133240eSMatthew G Knepley /* 3e133240eSMatthew G Knepley Provides an interface to the CUFFT package. 4c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 5e133240eSMatthew G Knepley */ 6e133240eSMatthew G Knepley 7*030f984aSJacob Faibussowitsch #include <petscdevice.h> 8af0996ceSBarry Smith #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 9e133240eSMatthew G Knepley 10e133240eSMatthew G Knepley typedef struct { 11e133240eSMatthew G Knepley PetscInt ndim; 12e133240eSMatthew G Knepley PetscInt *dim; 13e133240eSMatthew G Knepley cufftHandle p_forward, p_backward; 14e133240eSMatthew G Knepley cufftComplex *devArray; 15e133240eSMatthew G Knepley } Mat_CUFFT; 16e133240eSMatthew G Knepley 17e133240eSMatthew G Knepley PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) 18e133240eSMatthew G Knepley { 19e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 20e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 21e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 22e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 23e133240eSMatthew G Knepley cufftResult result; 24*030f984aSJacob Faibussowitsch cudaError_t cerr; 25e133240eSMatthew G Knepley PetscErrorCode ierr; 26e133240eSMatthew G Knepley 27e133240eSMatthew G Knepley PetscFunctionBegin; 28e133240eSMatthew G Knepley ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 29e133240eSMatthew G Knepley ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 30e133240eSMatthew G Knepley if (!cufft->p_forward) { 31e133240eSMatthew G Knepley cufftResult result; 32e133240eSMatthew G Knepley /* create a plan, then execute it */ 33e133240eSMatthew G Knepley switch (ndim) { 34e133240eSMatthew G Knepley case 1: 35*030f984aSJacob Faibussowitsch result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result); 36e133240eSMatthew G Knepley break; 37e133240eSMatthew G Knepley case 2: 38*030f984aSJacob Faibussowitsch result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result); 39e133240eSMatthew G Knepley break; 40e133240eSMatthew G Knepley case 3: 41*030f984aSJacob Faibussowitsch result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result); 42e133240eSMatthew G Knepley break; 43e133240eSMatthew G Knepley default: 44e133240eSMatthew G Knepley SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 45e133240eSMatthew G Knepley } 46e133240eSMatthew G Knepley } 47e133240eSMatthew G Knepley /* transfer to GPU memory */ 48*030f984aSJacob Faibussowitsch cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr); 49e133240eSMatthew G Knepley /* execute transform */ 50*030f984aSJacob Faibussowitsch result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRCUFFT(result); 51e133240eSMatthew G Knepley /* transfer from GPU memory */ 52*030f984aSJacob Faibussowitsch cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr); 53e133240eSMatthew G Knepley ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 54e133240eSMatthew G Knepley ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 55e133240eSMatthew G Knepley PetscFunctionReturn(0); 56e133240eSMatthew G Knepley } 57e133240eSMatthew G Knepley 58e133240eSMatthew G Knepley PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) 59e133240eSMatthew G Knepley { 60e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 61e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 62e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 63e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 64e133240eSMatthew G Knepley cufftResult result; 65*030f984aSJacob Faibussowitsch cudaError_t cerr; 66e133240eSMatthew G Knepley PetscErrorCode ierr; 67e133240eSMatthew G Knepley 68e133240eSMatthew G Knepley PetscFunctionBegin; 69e133240eSMatthew G Knepley ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 70e133240eSMatthew G Knepley ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 71e133240eSMatthew G Knepley if (!cufft->p_backward) { 72e133240eSMatthew G Knepley /* create a plan, then execute it */ 73e133240eSMatthew G Knepley switch (ndim) { 74e133240eSMatthew G Knepley case 1: 75*030f984aSJacob Faibussowitsch result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result); 76e133240eSMatthew G Knepley break; 77e133240eSMatthew G Knepley case 2: 78*030f984aSJacob Faibussowitsch result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result); 79e133240eSMatthew G Knepley break; 80e133240eSMatthew G Knepley case 3: 81*030f984aSJacob Faibussowitsch result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result); 82e133240eSMatthew G Knepley break; 83e133240eSMatthew G Knepley default: 84e133240eSMatthew G Knepley SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 85e133240eSMatthew G Knepley } 86e133240eSMatthew G Knepley } 87e133240eSMatthew G Knepley /* transfer to GPU memory */ 88*030f984aSJacob Faibussowitsch cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr); 89e133240eSMatthew G Knepley /* execute transform */ 90*030f984aSJacob Faibussowitsch result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRCUFFT(result); 91e133240eSMatthew G Knepley /* transfer from GPU memory */ 92*030f984aSJacob Faibussowitsch cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr); 93e133240eSMatthew G Knepley ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 94e133240eSMatthew G Knepley ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 95e133240eSMatthew G Knepley PetscFunctionReturn(0); 96e133240eSMatthew G Knepley } 97e133240eSMatthew G Knepley 98e133240eSMatthew G Knepley PetscErrorCode MatDestroy_SeqCUFFT(Mat A) 99e133240eSMatthew G Knepley { 100e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 101e133240eSMatthew G Knepley cufftResult result; 102*030f984aSJacob Faibussowitsch cudaError_t cerr; 103e133240eSMatthew G Knepley PetscErrorCode ierr; 104e133240eSMatthew G Knepley 105e133240eSMatthew G Knepley PetscFunctionBegin; 106e133240eSMatthew G Knepley ierr = PetscFree(cufft->dim);CHKERRQ(ierr); 107*030f984aSJacob Faibussowitsch if (cufft->p_forward) {result = cufftDestroy(cufft->p_forward);CHKERRCUFFT(result);} 108*030f984aSJacob Faibussowitsch if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRCUFFT(result);} 109*030f984aSJacob Faibussowitsch cerr = cudaFree(cufft->devArray);CHKERRCUDA(cerr); 110bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 111bf0cc555SLisandro Dalcin ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 112e133240eSMatthew G Knepley PetscFunctionReturn(0); 113e133240eSMatthew G Knepley } 114e133240eSMatthew G Knepley 115e133240eSMatthew G Knepley /*@ 116e133240eSMatthew G Knepley MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT 117e133240eSMatthew G Knepley 118d083f849SBarry Smith Collective 119e133240eSMatthew G Knepley 120e133240eSMatthew G Knepley Input Parameters: 121e133240eSMatthew G Knepley + comm - MPI communicator, set to PETSC_COMM_SELF 122e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform 123e133240eSMatthew G Knepley - dim - array of size ndim, dim[i] contains the vector length in the i-dimension 124e133240eSMatthew G Knepley 125e133240eSMatthew G Knepley Output Parameter: 126e133240eSMatthew G Knepley . A - the matrix 127e133240eSMatthew G Knepley 128e133240eSMatthew G Knepley Options Database Keys: 129e133240eSMatthew G Knepley . -mat_cufft_plannerflags - set CUFFT planner flags 130e133240eSMatthew G Knepley 131e133240eSMatthew G Knepley Level: intermediate 132e133240eSMatthew G Knepley @*/ 1337087cfbeSBarry Smith PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) 134e133240eSMatthew G Knepley { 135e133240eSMatthew G Knepley Mat_CUFFT *cufft; 136e133240eSMatthew G Knepley PetscInt m, d; 137e133240eSMatthew G Knepley PetscErrorCode ierr; 138*030f984aSJacob Faibussowitsch cudaError_t cerr; 139e133240eSMatthew G Knepley 140e133240eSMatthew G Knepley PetscFunctionBegin; 141e133240eSMatthew G Knepley if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim); 142e133240eSMatthew G Knepley ierr = MatCreate(comm, A);CHKERRQ(ierr); 143e133240eSMatthew G Knepley m = 1; 144e133240eSMatthew G Knepley for (d = 0; d < ndim; ++d) { 145e133240eSMatthew G Knepley if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]); 146e133240eSMatthew G Knepley m *= dim[d]; 147e133240eSMatthew G Knepley } 148e133240eSMatthew G Knepley ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr); 149e133240eSMatthew G Knepley ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr); 150e133240eSMatthew G Knepley 151b00a9115SJed Brown ierr = PetscNewLog(*A,&cufft);CHKERRQ(ierr); 152e133240eSMatthew G Knepley (*A)->data = (void*) cufft; 153854ce69bSBarry Smith ierr = PetscMalloc1(ndim+1, &cufft->dim);CHKERRQ(ierr); 154580bdb30SBarry Smith ierr = PetscArraycpy(cufft->dim, dim, ndim);CHKERRQ(ierr); 1552205254eSKarl Rupp 156e133240eSMatthew G Knepley cufft->ndim = ndim; 157e133240eSMatthew G Knepley cufft->p_forward = 0; 158e133240eSMatthew G Knepley cufft->p_backward = 0; 159e133240eSMatthew G Knepley cufft->dim[ndim] = m; 160e133240eSMatthew G Knepley 161e133240eSMatthew G Knepley /* GPU memory allocation */ 162*030f984aSJacob Faibussowitsch cerr = cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);CHKERRCUDA(cerr); 163e133240eSMatthew G Knepley 164e133240eSMatthew G Knepley (*A)->ops->mult = MatMult_SeqCUFFT; 165e133240eSMatthew G Knepley (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 166e133240eSMatthew G Knepley (*A)->assembled = PETSC_TRUE; 167e133240eSMatthew G Knepley (*A)->ops->destroy = MatDestroy_SeqCUFFT; 168e133240eSMatthew G Knepley 169e133240eSMatthew G Knepley /* get runtime options */ 170e133240eSMatthew G Knepley ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr); 171e133240eSMatthew G Knepley ierr = PetscOptionsEnd();CHKERRQ(ierr); 172e133240eSMatthew G Knepley PetscFunctionReturn(0); 173e133240eSMatthew G Knepley } 174