1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 90cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 10c6db04a5SJed Brown #include <fftw3-mpi.h> 110cf2e031SBarry Smith #else 120cf2e031SBarry Smith #include <fftw3.h> 130cf2e031SBarry Smith #endif 14b2b77a04SHong Zhang EXTERN_C_END 15b2b77a04SHong Zhang 16b2b77a04SHong Zhang typedef struct { 17b9ae062cSHong Zhang ptrdiff_t ndim_fftw, *dim_fftw; 18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 19a1b6d50cSHong Zhang fftw_iodim64 *iodims; 20a1b6d50cSHong Zhang #else 218c1d5ab3SHong Zhang fftw_iodim *iodims; 22a1b6d50cSHong Zhang #endif 23e5d7f247SAmlan Barua PetscInt partial_dim; 24b2b77a04SHong Zhang fftw_plan p_forward, p_backward; 25b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26da81f932SPierre Jolivet PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be 27b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 28b2b77a04SHong Zhang } Mat_FFTW; 29b2b77a04SHong Zhang 30b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 31b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 320cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat); 330cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 340cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 35b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 36b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 37b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 380cf2e031SBarry Smith #endif 39b2b77a04SHong Zhang 40d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 41d71ae5a4SJacob Faibussowitsch { 42b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 43b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 44f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 45f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 461acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 47a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 48a1b6d50cSHong Zhang fftw_iodim64 *iodims; 49a1b6d50cSHong Zhang #else 508c1d5ab3SHong Zhang fftw_iodim *iodims; 51a1b6d50cSHong Zhang #endif 521acd80e4SHong Zhang PetscInt i; 531acd80e4SHong Zhang #endif 541acd80e4SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 55b2b77a04SHong Zhang 56b2b77a04SHong Zhang PetscFunctionBegin; 579566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 589566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 596aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 60b2b77a04SHong Zhang switch (ndim) { 61b2b77a04SHong Zhang case 1: 6258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 63b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 6458a912c5SAmlan Barua #else 6558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 6658a912c5SAmlan Barua #endif 67b2b77a04SHong Zhang break; 68b2b77a04SHong Zhang case 2: 6958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 70b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 7158a912c5SAmlan Barua #else 7258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 7358a912c5SAmlan Barua #endif 74b2b77a04SHong Zhang break; 75b2b77a04SHong Zhang case 3: 7658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 77b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 7858a912c5SAmlan Barua #else 7958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 8058a912c5SAmlan Barua #endif 81b2b77a04SHong Zhang break; 82b2b77a04SHong Zhang default: 8358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 84a1b6d50cSHong Zhang iodims = fftw->iodims; 85a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 868c1d5ab3SHong Zhang if (ndim) { 87a1b6d50cSHong Zhang iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 888c1d5ab3SHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 898c1d5ab3SHong Zhang for (i = ndim - 2; i >= 0; --i) { 90a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 918c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 928c1d5ab3SHong Zhang } 938c1d5ab3SHong Zhang } 94a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 95a1b6d50cSHong Zhang #else 96a1b6d50cSHong Zhang if (ndim) { 97a1b6d50cSHong Zhang iodims[ndim - 1].n = (int)dim[ndim - 1]; 98a1b6d50cSHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 99a1b6d50cSHong Zhang for (i = ndim - 2; i >= 0; --i) { 100a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 101a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 102a1b6d50cSHong Zhang } 103a1b6d50cSHong Zhang } 104a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 105a1b6d50cSHong Zhang #endif 106a1b6d50cSHong Zhang 10758a912c5SAmlan Barua #else 108a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 10958a912c5SAmlan Barua #endif 110b2b77a04SHong Zhang break; 111b2b77a04SHong Zhang } 112f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 113b2b77a04SHong Zhang fftw->foutarray = y_array; 114b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 115b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 116b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 117b2b77a04SHong Zhang } else { /* use existing plan */ 118b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 1197e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 120b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 1217e4bc134SDominic Meiser #else 1227e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 1237e4bc134SDominic Meiser #endif 124b2b77a04SHong Zhang } else { 125b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 126b2b77a04SHong Zhang } 127b2b77a04SHong Zhang } 1289566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 1299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 1303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 131b2b77a04SHong Zhang } 132b2b77a04SHong Zhang 133d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) 134d71ae5a4SJacob Faibussowitsch { 135b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 136b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 137f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 138f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 139b2b77a04SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 1401acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 141a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 142a1b6d50cSHong Zhang fftw_iodim64 *iodims = fftw->iodims; 143a1b6d50cSHong Zhang #else 144a1b6d50cSHong Zhang fftw_iodim *iodims = fftw->iodims; 145a1b6d50cSHong Zhang #endif 1461acd80e4SHong Zhang #endif 147b2b77a04SHong Zhang 148b2b77a04SHong Zhang PetscFunctionBegin; 1499566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 1509566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 1516aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 152b2b77a04SHong Zhang switch (ndim) { 153b2b77a04SHong Zhang case 1: 15458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 155b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 15658a912c5SAmlan Barua #else 157e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 15858a912c5SAmlan Barua #endif 159b2b77a04SHong Zhang break; 160b2b77a04SHong Zhang case 2: 16158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 162b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 16358a912c5SAmlan Barua #else 164e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 16558a912c5SAmlan Barua #endif 166b2b77a04SHong Zhang break; 167b2b77a04SHong Zhang case 3: 16858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 169b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 17058a912c5SAmlan Barua #else 171e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 17258a912c5SAmlan Barua #endif 173b2b77a04SHong Zhang break; 174b2b77a04SHong Zhang default: 17558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 176a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 177a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 178a1b6d50cSHong Zhang #else 1798c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 180a1b6d50cSHong Zhang #endif 18158a912c5SAmlan Barua #else 182a31b9140SHong Zhang fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 18358a912c5SAmlan Barua #endif 184b2b77a04SHong Zhang break; 185b2b77a04SHong Zhang } 186f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 187b2b77a04SHong Zhang fftw->boutarray = y_array; 1882f613bf5SBarry Smith fftw_execute(fftw->p_backward); 189b2b77a04SHong Zhang } else { /* use existing plan */ 190b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 1917e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 192b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 1937e4bc134SDominic Meiser #else 1947e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 1957e4bc134SDominic Meiser #endif 196b2b77a04SHong Zhang } else { 1972f613bf5SBarry Smith fftw_execute(fftw->p_backward); 198b2b77a04SHong Zhang } 199b2b77a04SHong Zhang } 2009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2019566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 2023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 203b2b77a04SHong Zhang } 204b2b77a04SHong Zhang 2050cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 206d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) 207d71ae5a4SJacob Faibussowitsch { 208b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 209b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 210f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 211f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 212c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 213ce94432eSBarry Smith MPI_Comm comm; 214b2b77a04SHong Zhang 215b2b77a04SHong Zhang PetscFunctionBegin; 2169566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2179566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 2189566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 2196aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 220b2b77a04SHong Zhang switch (ndim) { 221b2b77a04SHong Zhang case 1: 2223c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 223b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 224ae0a50aaSHong Zhang #else 2254f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 2263c3a9c75SAmlan Barua #endif 227b2b77a04SHong Zhang break; 228b2b77a04SHong Zhang case 2: 22997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 230b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2313c3a9c75SAmlan Barua #else 2323c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2333c3a9c75SAmlan Barua #endif 234b2b77a04SHong Zhang break; 235b2b77a04SHong Zhang case 3: 2363c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 237b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2383c3a9c75SAmlan Barua #else 23951d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2403c3a9c75SAmlan Barua #endif 241b2b77a04SHong Zhang break; 242b2b77a04SHong Zhang default: 2433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 244c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2453c3a9c75SAmlan Barua #else 2463c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2473c3a9c75SAmlan Barua #endif 248b2b77a04SHong Zhang break; 249b2b77a04SHong Zhang } 250f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 251b2b77a04SHong Zhang fftw->foutarray = y_array; 252b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 253b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 254b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 255b2b77a04SHong Zhang } else { /* use existing plan */ 256b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 257b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 258b2b77a04SHong Zhang } else { 259b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 260b2b77a04SHong Zhang } 261b2b77a04SHong Zhang } 2629566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 2643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 265b2b77a04SHong Zhang } 266b2b77a04SHong Zhang 267d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) 268d71ae5a4SJacob Faibussowitsch { 269b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 270b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 271f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 272f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 273c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 274ce94432eSBarry Smith MPI_Comm comm; 275b2b77a04SHong Zhang 276b2b77a04SHong Zhang PetscFunctionBegin; 2779566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2789566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 2799566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 2806aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 281b2b77a04SHong Zhang switch (ndim) { 282b2b77a04SHong Zhang case 1: 2833c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 284b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 285ae0a50aaSHong Zhang #else 2864f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 2873c3a9c75SAmlan Barua #endif 288b2b77a04SHong Zhang break; 289b2b77a04SHong Zhang case 2: 29097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 291b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 2923c3a9c75SAmlan Barua #else 2933c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 2943c3a9c75SAmlan Barua #endif 295b2b77a04SHong Zhang break; 296b2b77a04SHong Zhang case 3: 2973c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 298b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 2993c3a9c75SAmlan Barua #else 3003c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3013c3a9c75SAmlan Barua #endif 302b2b77a04SHong Zhang break; 303b2b77a04SHong Zhang default: 3043c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 305c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3063c3a9c75SAmlan Barua #else 3073c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3083c3a9c75SAmlan Barua #endif 309b2b77a04SHong Zhang break; 310b2b77a04SHong Zhang } 311f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 312b2b77a04SHong Zhang fftw->boutarray = y_array; 3132f613bf5SBarry Smith fftw_execute(fftw->p_backward); 314b2b77a04SHong Zhang } else { /* use existing plan */ 315b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 316b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 317b2b77a04SHong Zhang } else { 3182f613bf5SBarry Smith fftw_execute(fftw->p_backward); 319b2b77a04SHong Zhang } 320b2b77a04SHong Zhang } 3219566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 3229566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 3233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 324b2b77a04SHong Zhang } 3250cf2e031SBarry Smith #endif 326b2b77a04SHong Zhang 327d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_FFTW(Mat A) 328d71ae5a4SJacob Faibussowitsch { 329b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 330b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 331b2b77a04SHong Zhang 332b2b77a04SHong Zhang PetscFunctionBegin; 333b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 334b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 335ad540459SPierre Jolivet if (fftw->iodims) free(fftw->iodims); 3369566063dSJacob Faibussowitsch PetscCall(PetscFree(fftw->dim_fftw)); 3379566063dSJacob Faibussowitsch PetscCall(PetscFree(fft->data)); 3382e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 3392e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 3402e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 3410cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 3426ccf0b3eSHong Zhang fftw_mpi_cleanup(); 3430cf2e031SBarry Smith #endif 3443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 345b2b77a04SHong Zhang } 346b2b77a04SHong Zhang 3470cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 348c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 349d71ae5a4SJacob Faibussowitsch PetscErrorCode VecDestroy_MPIFFTW(Vec v) 350d71ae5a4SJacob Faibussowitsch { 351b2b77a04SHong Zhang PetscScalar *array; 352b2b77a04SHong Zhang 353b2b77a04SHong Zhang PetscFunctionBegin; 3549566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array)); 3552f613bf5SBarry Smith fftw_free((fftw_complex *)array); 3569566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array)); 3579566063dSJacob Faibussowitsch PetscCall(VecDestroy_MPI(v)); 3583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 359b2b77a04SHong Zhang } 3600cf2e031SBarry Smith #endif 361b2b77a04SHong Zhang 3620cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 363d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) 364d71ae5a4SJacob Faibussowitsch { 3655b113e21Ss-sajid-ali Mat A; 3665b113e21Ss-sajid-ali 3675b113e21Ss-sajid-ali PetscFunctionBegin; 3689566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 3699566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 3703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3715b113e21Ss-sajid-ali } 3725b113e21Ss-sajid-ali 373d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) 374d71ae5a4SJacob Faibussowitsch { 3755b113e21Ss-sajid-ali Mat A; 3765b113e21Ss-sajid-ali 3775b113e21Ss-sajid-ali PetscFunctionBegin; 3789566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 3799566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 3803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3815b113e21Ss-sajid-ali } 3825b113e21Ss-sajid-ali 383d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 384d71ae5a4SJacob Faibussowitsch { 3855b113e21Ss-sajid-ali Mat A; 3865b113e21Ss-sajid-ali 3875b113e21Ss-sajid-ali PetscFunctionBegin; 3889566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 3899566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 3903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3915b113e21Ss-sajid-ali } 3920cf2e031SBarry Smith #endif 3935b113e21Ss-sajid-ali 3944be45526SHong Zhang /*@ 3952a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 39611a5261eSBarry Smith parallel layout determined by `MATFFTW` 3974f7415efSAmlan Barua 398c3339decSBarry Smith Collective 3994f7415efSAmlan Barua 4004f7415efSAmlan Barua Input Parameter: 4013564f093SHong Zhang . A - the matrix 4024f7415efSAmlan Barua 403d8d19677SJose E. Roman Output Parameters: 404cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 4056b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW 406cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4074f7415efSAmlan Barua 4082ef1f0ffSBarry Smith Options Database Key: 4092ef1f0ffSBarry Smith . -mat_fftw_plannerflags - set FFTW planner flags 4102ef1f0ffSBarry Smith 4114f7415efSAmlan Barua Level: advanced 4123564f093SHong Zhang 41311a5261eSBarry Smith Notes: 41411a5261eSBarry Smith The parallel layout of output of forward FFTW is always same as the input 41597504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 41697504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 41711a5261eSBarry Smith 41811a5261eSBarry Smith We need to provide enough space while doing parallel real transform. 41997504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 42097504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 421e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 422e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 423e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 42411a5261eSBarry Smith 425e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 426e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 427e0ec536eSAmlan Barua each processor and returns that. 4284f7415efSAmlan Barua 42911a5261eSBarry Smith Developer Note: 43011a5261eSBarry Smith How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 43111a5261eSBarry Smith 432*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 4334be45526SHong Zhang @*/ 434d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) 435d71ae5a4SJacob Faibussowitsch { 4364be45526SHong Zhang PetscFunctionBegin; 437cac4c232SBarry Smith PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 4383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4394be45526SHong Zhang } 4404be45526SHong Zhang 441d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) 442d71ae5a4SJacob Faibussowitsch { 4434f7415efSAmlan Barua PetscMPIInt size, rank; 444ce94432eSBarry Smith MPI_Comm comm; 4454f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 4464f7415efSAmlan Barua 4474f7415efSAmlan Barua PetscFunctionBegin; 4484f7415efSAmlan Barua PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4494f7415efSAmlan Barua PetscValidType(A, 1); 4509566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 4514f7415efSAmlan Barua 4529566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 4539566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4544f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4554f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4569566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 4579566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 4589566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 4594f7415efSAmlan Barua #else 4609566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 4619566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 4629566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 4634f7415efSAmlan Barua #endif 4640cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 46597504071SAmlan Barua } else { /* parallel cases */ 4660cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 4670cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 4684f7415efSAmlan Barua ptrdiff_t alloc_local, local_n0, local_0_start; 4699496c9aeSAmlan Barua ptrdiff_t local_n1; 4709496c9aeSAmlan Barua fftw_complex *data_fout; 4719496c9aeSAmlan Barua ptrdiff_t local_1_start; 4729496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4739496c9aeSAmlan Barua fftw_complex *data_fin, *data_bout; 4749496c9aeSAmlan Barua #else 4754f7415efSAmlan Barua double *data_finr, *data_boutr; 4766ccf0b3eSHong Zhang PetscInt n1, N1; 4779496c9aeSAmlan Barua ptrdiff_t temp; 4789496c9aeSAmlan Barua #endif 4799496c9aeSAmlan Barua 4804f7415efSAmlan Barua switch (ndim) { 4814f7415efSAmlan Barua case 1: 48257625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4834f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 48457625b84SAmlan Barua #else 48557625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 48657625b84SAmlan Barua if (fin) { 48757625b84SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 4889566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 4899566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 4905b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 49157625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 49257625b84SAmlan Barua } 49357625b84SAmlan Barua if (fout) { 49457625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 4959566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 4969566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 4975b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 49857625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 49957625b84SAmlan Barua } 50057625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 50157625b84SAmlan Barua if (bout) { 50257625b84SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5039566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 5049566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5055b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 50657625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 50757625b84SAmlan Barua } 50857625b84SAmlan Barua break; 50957625b84SAmlan Barua #endif 5104f7415efSAmlan Barua case 2: 51197504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5124f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 5139371c9d4SSatish Balay N1 = 2 * dim[0] * (dim[1] / 2 + 1); 5149371c9d4SSatish Balay n1 = 2 * local_n0 * (dim[1] / 2 + 1); 5154f7415efSAmlan Barua if (fin) { 5164f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5179566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 5189566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5195b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5204f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5214f7415efSAmlan Barua } 52257625b84SAmlan Barua if (fout) { 52357625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5249566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 5259566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5265b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 52757625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 52857625b84SAmlan Barua } 5295b113e21Ss-sajid-ali if (bout) { 5305b113e21Ss-sajid-ali data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5319566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 5329566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5335b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5345b113e21Ss-sajid-ali (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5355b113e21Ss-sajid-ali } 5364f7415efSAmlan Barua #else 5374f7415efSAmlan Barua /* Get local size */ 5384f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 5394f7415efSAmlan Barua if (fin) { 5404f7415efSAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5419566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 5429566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5435b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5444f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5454f7415efSAmlan Barua } 5464f7415efSAmlan Barua if (fout) { 5474f7415efSAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5489566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 5499566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5505b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 5514f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5524f7415efSAmlan Barua } 5534f7415efSAmlan Barua if (bout) { 5544f7415efSAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5559566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 5569566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5575b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5584f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5594f7415efSAmlan Barua } 5604f7415efSAmlan Barua #endif 5614f7415efSAmlan Barua break; 5624f7415efSAmlan Barua case 3: 5634f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5644f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 5659371c9d4SSatish Balay N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 5669371c9d4SSatish Balay n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 5674f7415efSAmlan Barua if (fin) { 5684f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5699566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 5709566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5715b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5724f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5734f7415efSAmlan Barua } 5745b113e21Ss-sajid-ali if (fout) { 5755b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5769566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 5779566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5785b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 5795b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5805b113e21Ss-sajid-ali } 5814f7415efSAmlan Barua if (bout) { 5824f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5839566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 5849566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5855b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5864f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5874f7415efSAmlan Barua } 5884f7415efSAmlan Barua #else 5890c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 5900c9b18e4SAmlan Barua if (fin) { 5910c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5929566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 5939566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5945b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5950c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5960c9b18e4SAmlan Barua } 5970c9b18e4SAmlan Barua if (fout) { 5980c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5999566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6009566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6015b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6020c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6030c9b18e4SAmlan Barua } 6040c9b18e4SAmlan Barua if (bout) { 6050c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6069566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 6079566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6085b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6090c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6100c9b18e4SAmlan Barua } 6114f7415efSAmlan Barua #endif 6124f7415efSAmlan Barua break; 6134f7415efSAmlan Barua default: 6144f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6154f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 6164f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 6174f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 6180cf2e031SBarry Smith N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 6194f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 6204f7415efSAmlan Barua 6214f7415efSAmlan Barua if (fin) { 6224f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6239566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 6249566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6255b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6264f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6274f7415efSAmlan Barua } 6285b113e21Ss-sajid-ali if (fout) { 6295b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6309566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 6319566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6325b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6335b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6345b113e21Ss-sajid-ali } 6354f7415efSAmlan Barua if (bout) { 6364f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6379566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 6389566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6395b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6409496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6414f7415efSAmlan Barua } 6424f7415efSAmlan Barua #else 6430c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 6440c9b18e4SAmlan Barua if (fin) { 6450c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6469566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 6479566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6485b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6490c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6500c9b18e4SAmlan Barua } 6510c9b18e4SAmlan Barua if (fout) { 6520c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6539566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6549566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6555b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6560c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6570c9b18e4SAmlan Barua } 6580c9b18e4SAmlan Barua if (bout) { 6590c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6609566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 6619566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6625b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6630c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6640c9b18e4SAmlan Barua } 6654f7415efSAmlan Barua #endif 6664f7415efSAmlan Barua break; 6674f7415efSAmlan Barua } 668f9d91177SJunchao Zhang /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 669f9d91177SJunchao Zhang v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 670da81f932SPierre Jolivet memory leaks. We void these pointers here to avoid accident uses. 671f9d91177SJunchao Zhang */ 672f9d91177SJunchao Zhang if (fin) (*fin)->ops->replacearray = NULL; 673f9d91177SJunchao Zhang if (fout) (*fout)->ops->replacearray = NULL; 674f9d91177SJunchao Zhang if (bout) (*bout)->ops->replacearray = NULL; 6750cf2e031SBarry Smith #endif 6764f7415efSAmlan Barua } 6773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6784f7415efSAmlan Barua } 6794f7415efSAmlan Barua 6803564f093SHong Zhang /*@ 68111a5261eSBarry Smith VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 68254efbe56SHong Zhang 683c3339decSBarry Smith Collective 6843564f093SHong Zhang 6853564f093SHong Zhang Input Parameters: 6863564f093SHong Zhang + A - FFTW matrix 6873564f093SHong Zhang - x - the PETSc vector 6883564f093SHong Zhang 6892fe279fdSBarry Smith Output Parameter: 6903564f093SHong Zhang . y - the FFTW vector 6913564f093SHong Zhang 692b2b77a04SHong Zhang Level: intermediate 6933564f093SHong Zhang 69411a5261eSBarry Smith Note: 69511a5261eSBarry Smith For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 69697504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 69797504071SAmlan Barua zeros. This routine does that job by scattering operation. 69897504071SAmlan Barua 699*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 7003564f093SHong Zhang @*/ 701d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) 702d71ae5a4SJacob Faibussowitsch { 7033564f093SHong Zhang PetscFunctionBegin; 704cac4c232SBarry Smith PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 7053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7063564f093SHong Zhang } 7073c3a9c75SAmlan Barua 708d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) 709d71ae5a4SJacob Faibussowitsch { 710ce94432eSBarry Smith MPI_Comm comm; 7113c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 7129496c9aeSAmlan Barua PetscInt low; 7137a21806cSHong Zhang PetscMPIInt rank, size; 7147a21806cSHong Zhang PetscInt vsize, vsize1; 7153c3a9c75SAmlan Barua VecScatter vecscat; 7160cf2e031SBarry Smith IS list1; 7173c3a9c75SAmlan Barua 7183564f093SHong Zhang PetscFunctionBegin; 7199566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 7209566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 7219566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7229566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y, &low, NULL)); 7233c3a9c75SAmlan Barua 7243564f093SHong Zhang if (size == 1) { 7259566063dSJacob Faibussowitsch PetscCall(VecGetSize(x, &vsize)); 7269566063dSJacob Faibussowitsch PetscCall(VecGetSize(y, &vsize1)); 7279566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 7289566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 7299566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7309566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7319566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7329566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7330cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 7343564f093SHong Zhang } else { 7350cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 7360cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 7370cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 7380cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 7390cf2e031SBarry Smith IS list2; 7400cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 7410cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 7420cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 7430cf2e031SBarry Smith PetscInt NM; 7440cf2e031SBarry Smith ptrdiff_t temp; 7450cf2e031SBarry Smith #endif 7460cf2e031SBarry Smith 7473c3a9c75SAmlan Barua switch (ndim) { 7483c3a9c75SAmlan Barua case 1: 74964657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7507a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 75126fbe8dcSKarl Rupp 7529566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 7539566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 7549566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 7559566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7569566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7579566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7589566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7599566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 76064657d84SAmlan Barua #else 761e5d7f247SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 76264657d84SAmlan Barua #endif 7633c3a9c75SAmlan Barua break; 7643c3a9c75SAmlan Barua case 2: 765bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7667a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 76726fbe8dcSKarl Rupp 7689566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 7699566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 7709566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 7719566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7729566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7739566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7749566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7759566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 776bd59e6c6SAmlan Barua #else 777c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 77826fbe8dcSKarl Rupp 7799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 7809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 7813c3a9c75SAmlan Barua 7823564f093SHong Zhang if (dim[1] % 2 == 0) { 7833c3a9c75SAmlan Barua NM = dim[1] + 2; 7843564f093SHong Zhang } else { 7853c3a9c75SAmlan Barua NM = dim[1] + 1; 7863564f093SHong Zhang } 7873c3a9c75SAmlan Barua for (i = 0; i < local_n0; i++) { 7883c3a9c75SAmlan Barua for (j = 0; j < dim[1]; j++) { 7893c3a9c75SAmlan Barua tempindx = i * dim[1] + j; 7903c3a9c75SAmlan Barua tempindx1 = i * NM + j; 79126fbe8dcSKarl Rupp 7925b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 7933c3a9c75SAmlan Barua indx2[tempindx] = low + tempindx1; 7943c3a9c75SAmlan Barua } 7953c3a9c75SAmlan Barua } 7963c3a9c75SAmlan Barua 7979566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 7989566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 7993c3a9c75SAmlan Barua 8009566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8019566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8029566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8039566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8049566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8059566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8069566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8079566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 808bd59e6c6SAmlan Barua #endif 8099496c9aeSAmlan Barua break; 8103c3a9c75SAmlan Barua 8113c3a9c75SAmlan Barua case 3: 812bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8137a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 81426fbe8dcSKarl Rupp 8159566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 8169566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 8179566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8189566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8199566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8209566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8219566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8229566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 823bd59e6c6SAmlan Barua #else 824c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 825f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 8267a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 82726fbe8dcSKarl Rupp 8289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 8299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 83051d1eed7SAmlan Barua 831db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 832db4deed7SKarl Rupp else NM = dim[2] + 1; 83351d1eed7SAmlan Barua 83451d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 83551d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 83651d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 83751d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 83851d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 83926fbe8dcSKarl Rupp 84051d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 84151d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 84251d1eed7SAmlan Barua } 84351d1eed7SAmlan Barua } 84451d1eed7SAmlan Barua } 84551d1eed7SAmlan Barua 8469566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 8479566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 8489566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8499566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8509566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8519566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8529566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8539566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8549566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8559566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 856bd59e6c6SAmlan Barua #endif 8579496c9aeSAmlan Barua break; 8583c3a9c75SAmlan Barua 8593c3a9c75SAmlan Barua default: 860bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8617a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 86226fbe8dcSKarl Rupp 8639566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 8649566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 8659566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8669566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8679566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8689566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8699566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8709566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 871bd59e6c6SAmlan Barua #else 872c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 873f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 874e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 87526fbe8dcSKarl Rupp 876e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 87726fbe8dcSKarl Rupp 8787a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 87926fbe8dcSKarl Rupp 880e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 881e5d7f247SAmlan Barua 882e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 883e5d7f247SAmlan Barua 8849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 8859566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 886e5d7f247SAmlan Barua 887db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 888db4deed7SKarl Rupp else NM = 1; 889e5d7f247SAmlan Barua 8906971673cSAmlan Barua j = low; 8913564f093SHong Zhang for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 8926971673cSAmlan Barua indx1[i] = local_0_start * partial_dim + i; 8936971673cSAmlan Barua indx2[i] = j; 89426fbe8dcSKarl Rupp if (k % dim[ndim - 1] == 0) j += NM; 8956971673cSAmlan Barua j++; 8966971673cSAmlan Barua } 8979566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 8989566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 8999566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 9009566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9019566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9029566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9039566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 9049566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 9059566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 9069566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 907bd59e6c6SAmlan Barua #endif 9089496c9aeSAmlan Barua break; 9093c3a9c75SAmlan Barua } 9100cf2e031SBarry Smith #endif 911e81bb599SAmlan Barua } 9123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9133c3a9c75SAmlan Barua } 9143c3a9c75SAmlan Barua 9153564f093SHong Zhang /*@ 91611a5261eSBarry Smith VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 9173564f093SHong Zhang 918c3339decSBarry Smith Collective 9193564f093SHong Zhang 9203564f093SHong Zhang Input Parameters: 92111a5261eSBarry Smith + A - `MATFFTW` matrix 9223564f093SHong Zhang - x - FFTW vector 9233564f093SHong Zhang 9242fe279fdSBarry Smith Output Parameter: 9253564f093SHong Zhang . y - PETSc vector 9263564f093SHong Zhang 9273564f093SHong Zhang Level: intermediate 9283564f093SHong Zhang 92911a5261eSBarry Smith Note: 93011a5261eSBarry Smith While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 93111a5261eSBarry Smith `VecScatterFFTWToPetsc()` removes those extra zeros. 9323564f093SHong Zhang 933*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 9343564f093SHong Zhang @*/ 935d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) 936d71ae5a4SJacob Faibussowitsch { 9373c3a9c75SAmlan Barua PetscFunctionBegin; 938cac4c232SBarry Smith PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 9393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9403c3a9c75SAmlan Barua } 94154efbe56SHong Zhang 942d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) 943d71ae5a4SJacob Faibussowitsch { 944ce94432eSBarry Smith MPI_Comm comm; 9455b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 9469496c9aeSAmlan Barua PetscInt low; 9477a21806cSHong Zhang PetscMPIInt rank, size; 9485b3e41ffSAmlan Barua VecScatter vecscat; 9490cf2e031SBarry Smith IS list1; 9509496c9aeSAmlan Barua 9513564f093SHong Zhang PetscFunctionBegin; 9529566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9539566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9549566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 9559566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, NULL)); 9565b3e41ffSAmlan Barua 957e81bb599SAmlan Barua if (size == 1) { 9589566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 9599566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 9609566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9619566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9629566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9639566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 964e81bb599SAmlan Barua 9650cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 9663564f093SHong Zhang } else { 9670cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 9680cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 9690cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 9700cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 9710cf2e031SBarry Smith IS list2; 9720cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 9730cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 9740cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 9750cf2e031SBarry Smith PetscInt NM; 9760cf2e031SBarry Smith ptrdiff_t temp; 9770cf2e031SBarry Smith #endif 978e81bb599SAmlan Barua switch (ndim) { 979e81bb599SAmlan Barua case 1: 98064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9817a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 98226fbe8dcSKarl Rupp 9839566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 9849566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 9859566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 9869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9889566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9899566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 9909566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 99164657d84SAmlan Barua #else 9926a506ed8SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 99364657d84SAmlan Barua #endif 9945b3e41ffSAmlan Barua break; 9955b3e41ffSAmlan Barua case 2: 996bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9977a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 99826fbe8dcSKarl Rupp 9999566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 10009566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 10019566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10029566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10039566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10049566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10059566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10069566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1007bd59e6c6SAmlan Barua #else 10087a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 100926fbe8dcSKarl Rupp 10109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 10119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 10125b3e41ffSAmlan Barua 1013db4deed7SKarl Rupp if (dim[1] % 2 == 0) NM = dim[1] + 2; 1014db4deed7SKarl Rupp else NM = dim[1] + 1; 10155b3e41ffSAmlan Barua 10165b3e41ffSAmlan Barua for (i = 0; i < local_n0; i++) { 10175b3e41ffSAmlan Barua for (j = 0; j < dim[1]; j++) { 10185b3e41ffSAmlan Barua tempindx = i * dim[1] + j; 10195b3e41ffSAmlan Barua tempindx1 = i * NM + j; 102026fbe8dcSKarl Rupp 10215b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 10225b3e41ffSAmlan Barua indx2[tempindx] = low + tempindx1; 10235b3e41ffSAmlan Barua } 10245b3e41ffSAmlan Barua } 10255b3e41ffSAmlan Barua 10269566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 10279566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 10285b3e41ffSAmlan Barua 10299566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10309566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10319566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10329566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10339566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10349566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 10359566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 10369566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1037bd59e6c6SAmlan Barua #endif 10389496c9aeSAmlan Barua break; 10395b3e41ffSAmlan Barua case 3: 1040bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10417a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 104226fbe8dcSKarl Rupp 10439566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 10449566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 10459566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 10469566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10479566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10489566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10499566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10509566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1051bd59e6c6SAmlan Barua #else 10527a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 105326fbe8dcSKarl Rupp 10549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 10559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 105651d1eed7SAmlan Barua 1057db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 1058db4deed7SKarl Rupp else NM = dim[2] + 1; 105951d1eed7SAmlan Barua 106051d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 106151d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 106251d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 106351d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 106451d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 106526fbe8dcSKarl Rupp 106651d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 106751d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 106851d1eed7SAmlan Barua } 106951d1eed7SAmlan Barua } 107051d1eed7SAmlan Barua } 107151d1eed7SAmlan Barua 10729566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 10739566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 107451d1eed7SAmlan Barua 10759566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10769566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10779566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10789566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10799566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10809566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 10819566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 10829566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1083bd59e6c6SAmlan Barua #endif 10849496c9aeSAmlan Barua break; 10855b3e41ffSAmlan Barua default: 1086bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10877a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 108826fbe8dcSKarl Rupp 10899566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 10909566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 10919566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 10929566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10939566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10949566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10959566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10969566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1097bd59e6c6SAmlan Barua #else 1098ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 109926fbe8dcSKarl Rupp 1100ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 110126fbe8dcSKarl Rupp 1102c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 110326fbe8dcSKarl Rupp 1104ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1105ba6e06d0SAmlan Barua 1106ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1107ba6e06d0SAmlan Barua 11089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 11099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 1110ba6e06d0SAmlan Barua 1111db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 1112db4deed7SKarl Rupp else NM = 1; 1113ba6e06d0SAmlan Barua 1114ba6e06d0SAmlan Barua j = low; 11153564f093SHong Zhang for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 1116ba6e06d0SAmlan Barua indx1[i] = local_0_start * partial_dim + i; 1117ba6e06d0SAmlan Barua indx2[i] = j; 11183564f093SHong Zhang if (k % dim[ndim - 1] == 0) j += NM; 1119ba6e06d0SAmlan Barua j++; 1120ba6e06d0SAmlan Barua } 11219566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 11229566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1123ba6e06d0SAmlan Barua 11249566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 11259566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11269566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11279566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11289566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11299566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 11309566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 11319566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1132bd59e6c6SAmlan Barua #endif 11339496c9aeSAmlan Barua break; 11345b3e41ffSAmlan Barua } 11350cf2e031SBarry Smith #endif 1136e81bb599SAmlan Barua } 11373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11385b3e41ffSAmlan Barua } 11395b3e41ffSAmlan Barua 1140350f1385SJose E. Roman /*MC 1141350f1385SJose E. Roman MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package. 1142350f1385SJose E. Roman 1143350f1385SJose E. Roman Level: intermediate 1144350f1385SJose E. Roman 1145*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()` 1146350f1385SJose E. Roman M*/ 1147d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1148d71ae5a4SJacob Faibussowitsch { 1149ce94432eSBarry Smith MPI_Comm comm; 1150b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 1151b2b77a04SHong Zhang Mat_FFTW *fftw; 11520cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 11535274ed1bSHong Zhang const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 11545274ed1bSHong Zhang unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1155b2b77a04SHong Zhang PetscBool flg; 11564f48bc67SAmlan Barua PetscInt p_flag, partial_dim = 1, ctr; 115711902ff2SHong Zhang PetscMPIInt size, rank; 11589496c9aeSAmlan Barua ptrdiff_t *pdim; 11599496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11600cf2e031SBarry Smith PetscInt tot_dim; 11619496c9aeSAmlan Barua #endif 11629496c9aeSAmlan Barua 1163b2b77a04SHong Zhang PetscFunctionBegin; 11649566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 11659566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 11669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1167c92418ccSAmlan Barua 11680cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 11691878d478SAmlan Barua fftw_mpi_init(); 11700cf2e031SBarry Smith #endif 117111902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 117211902ff2SHong Zhang pdim[0] = dim[0]; 11738ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11748ca4c034SAmlan Barua tot_dim = 2 * dim[0]; 11758ca4c034SAmlan Barua #endif 11763564f093SHong Zhang for (ctr = 1; ctr < ndim; ctr++) { 11776882372aSHong Zhang partial_dim *= dim[ctr]; 117811902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11798ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1180db4deed7SKarl Rupp if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1181db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11828ca4c034SAmlan Barua #endif 11836882372aSHong Zhang } 11843c3a9c75SAmlan Barua 1185b2b77a04SHong Zhang if (size == 1) { 1186e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 11879566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 11880cf2e031SBarry Smith fft->n = fft->N; 1189e81bb599SAmlan Barua #else 11909566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 11910cf2e031SBarry Smith fft->n = tot_dim; 11920cf2e031SBarry Smith #endif 11930cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 11940cf2e031SBarry Smith } else { 11950cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 11960cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 11970cf2e031SBarry Smith ptrdiff_t temp; 11980cf2e031SBarry Smith PetscInt N1; 1199e81bb599SAmlan Barua #endif 1200e81bb599SAmlan Barua 1201b2b77a04SHong Zhang switch (ndim) { 1202b2b77a04SHong Zhang case 1: 12033c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12043c3a9c75SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1205e5d7f247SAmlan Barua #else 12067a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 12070cf2e031SBarry Smith fft->n = (PetscInt)local_n0; 12089566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1209e5d7f247SAmlan Barua #endif 1210b2b77a04SHong Zhang break; 1211b2b77a04SHong Zhang case 2: 12125b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 12137a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 12140cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1]; 12159566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 12165b3e41ffSAmlan Barua #else 1217c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 121826fbe8dcSKarl Rupp 12190cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 12209566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 12215b3e41ffSAmlan Barua #endif 1222b2b77a04SHong Zhang break; 1223b2b77a04SHong Zhang case 3: 122451d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12257a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 122626fbe8dcSKarl Rupp 12270cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 12289566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 122951d1eed7SAmlan Barua #else 1230c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 123126fbe8dcSKarl Rupp 12320cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 12339566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 123451d1eed7SAmlan Barua #endif 1235b2b77a04SHong Zhang break; 1236b2b77a04SHong Zhang default: 1237b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12387a21806cSHong Zhang fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 123926fbe8dcSKarl Rupp 12400cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * partial_dim; 12419566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1242b3a17365SAmlan Barua #else 1243b3a17365SAmlan Barua temp = pdim[ndim - 1]; 124426fbe8dcSKarl Rupp 1245b3a17365SAmlan Barua pdim[ndim - 1] = temp / 2 + 1; 124626fbe8dcSKarl Rupp 1247c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 124826fbe8dcSKarl Rupp 12490cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 12500cf2e031SBarry Smith N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 125126fbe8dcSKarl Rupp 1252b3a17365SAmlan Barua pdim[ndim - 1] = temp; 125326fbe8dcSKarl Rupp 12549566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1255b3a17365SAmlan Barua #endif 1256b2b77a04SHong Zhang break; 1257b2b77a04SHong Zhang } 12580cf2e031SBarry Smith #endif 1259b2b77a04SHong Zhang } 12602277172eSMark Adams free(pdim); 12619566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 12624dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&fftw)); 1263b2b77a04SHong Zhang fft->data = (void *)fftw; 1264b2b77a04SHong Zhang 12650c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1266e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 126726fbe8dcSKarl Rupp 12689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 12698c1d5ab3SHong Zhang if (size == 1) { 1270a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1271a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1272a1b6d50cSHong Zhang #else 12738c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1274a1b6d50cSHong Zhang #endif 12758c1d5ab3SHong Zhang } 127626fbe8dcSKarl Rupp 1277b1301b2eSHong Zhang for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1278c92418ccSAmlan Barua 1279f4259b30SLisandro Dalcin fftw->p_forward = NULL; 1280f4259b30SLisandro Dalcin fftw->p_backward = NULL; 1281b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1282b2b77a04SHong Zhang 1283b2b77a04SHong Zhang if (size == 1) { 1284b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1285b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 12860cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 1287b2b77a04SHong Zhang } else { 1288b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1289b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 12900cf2e031SBarry Smith #endif 1291b2b77a04SHong Zhang } 1292b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1293b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12944a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 129526fbe8dcSKarl Rupp 12969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 12979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 12989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1299b2b77a04SHong Zhang 1300b2b77a04SHong Zhang /* get runtime options */ 1301d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 13029566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 13035f80ce2aSJacob Faibussowitsch if (flg) fftw->p_flag = iplans[p_flag]; 1304d0609cedSBarry Smith PetscOptionsEnd(); 13053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1306b2b77a04SHong Zhang } 1307