1b2b77a04SHong Zhang /* 2b2b77a04SHong Zhang Provides an interface to the FFTW package. 3c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 4b2b77a04SHong Zhang */ 5b2b77a04SHong Zhang 6c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 7b2b77a04SHong Zhang EXTERN_C_BEGIN 80cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 9c6db04a5SJed Brown #include <fftw3-mpi.h> 100cf2e031SBarry Smith #else 110cf2e031SBarry Smith #include <fftw3.h> 120cf2e031SBarry Smith #endif 13b2b77a04SHong Zhang EXTERN_C_END 14b2b77a04SHong Zhang 15b2b77a04SHong Zhang typedef struct { 16b9ae062cSHong Zhang ptrdiff_t ndim_fftw, *dim_fftw; 17a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 18a1b6d50cSHong Zhang fftw_iodim64 *iodims; 19a1b6d50cSHong Zhang #else 208c1d5ab3SHong Zhang fftw_iodim *iodims; 21a1b6d50cSHong Zhang #endif 22e5d7f247SAmlan Barua PetscInt partial_dim; 23b2b77a04SHong Zhang fftw_plan p_forward, p_backward; 24b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 25*e8bdd179SBarry Smith PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */ 26b2b77a04SHong Zhang } Mat_FFTW; 27b2b77a04SHong Zhang 28b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 300cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat); 310cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 320cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 33b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 34b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 35b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 360cf2e031SBarry Smith #endif 37b2b77a04SHong Zhang 38d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 39d71ae5a4SJacob Faibussowitsch { 40b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 41b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 42f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 43f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 44*e8bdd179SBarry Smith Vec xx; 451acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 46a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 47a1b6d50cSHong Zhang fftw_iodim64 *iodims; 48a1b6d50cSHong Zhang #else 498c1d5ab3SHong Zhang fftw_iodim *iodims; 50a1b6d50cSHong Zhang #endif 511acd80e4SHong Zhang PetscInt i; 521acd80e4SHong Zhang #endif 531acd80e4SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 54b2b77a04SHong Zhang 55b2b77a04SHong Zhang PetscFunctionBegin; 56*e8bdd179SBarry Smith if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) { 57*e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 58*e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx)); 59*e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array)); 60*e8bdd179SBarry Smith } else { 619566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 62*e8bdd179SBarry Smith } 639566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 646aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 65b2b77a04SHong Zhang switch (ndim) { 66b2b77a04SHong Zhang case 1: 6758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 68b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 6958a912c5SAmlan Barua #else 7058a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 7158a912c5SAmlan Barua #endif 72b2b77a04SHong Zhang break; 73b2b77a04SHong Zhang case 2: 7458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 75b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 7658a912c5SAmlan Barua #else 7758a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 7858a912c5SAmlan Barua #endif 79b2b77a04SHong Zhang break; 80b2b77a04SHong Zhang case 3: 8158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 82b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 8358a912c5SAmlan Barua #else 8458a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 8558a912c5SAmlan Barua #endif 86b2b77a04SHong Zhang break; 87b2b77a04SHong Zhang default: 8858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 89a1b6d50cSHong Zhang iodims = fftw->iodims; 90a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 918c1d5ab3SHong Zhang if (ndim) { 92a1b6d50cSHong Zhang iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 938c1d5ab3SHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 948c1d5ab3SHong Zhang for (i = ndim - 2; i >= 0; --i) { 95a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 968c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 978c1d5ab3SHong Zhang } 988c1d5ab3SHong Zhang } 99a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 100a1b6d50cSHong Zhang #else 101a1b6d50cSHong Zhang if (ndim) { 102a1b6d50cSHong Zhang iodims[ndim - 1].n = (int)dim[ndim - 1]; 103a1b6d50cSHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 104a1b6d50cSHong Zhang for (i = ndim - 2; i >= 0; --i) { 105a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 106a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 107a1b6d50cSHong Zhang } 108a1b6d50cSHong Zhang } 109a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 110a1b6d50cSHong Zhang #endif 111a1b6d50cSHong Zhang 11258a912c5SAmlan Barua #else 113a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 11458a912c5SAmlan Barua #endif 115b2b77a04SHong Zhang break; 116b2b77a04SHong Zhang } 117*e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) { 118*e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 119*e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array)); 120*e8bdd179SBarry Smith PetscCall(VecDestroy(&xx)); 121*e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array)); 122*e8bdd179SBarry Smith } else { 123f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 124b2b77a04SHong Zhang fftw->foutarray = y_array; 125*e8bdd179SBarry Smith } 126*e8bdd179SBarry Smith } 127*e8bdd179SBarry Smith 128b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 1297e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 130b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 1317e4bc134SDominic Meiser #else 1327e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 1337e4bc134SDominic Meiser #endif 134b2b77a04SHong Zhang } else { 135b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 136b2b77a04SHong Zhang } 1379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 1389566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 1393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 140b2b77a04SHong Zhang } 141b2b77a04SHong Zhang 142d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) 143d71ae5a4SJacob Faibussowitsch { 144b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 145b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 146f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 147f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 148b2b77a04SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 149*e8bdd179SBarry Smith Vec xx; 1501acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 151a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 152a1b6d50cSHong Zhang fftw_iodim64 *iodims = fftw->iodims; 153a1b6d50cSHong Zhang #else 154a1b6d50cSHong Zhang fftw_iodim *iodims = fftw->iodims; 155a1b6d50cSHong Zhang #endif 1561acd80e4SHong Zhang #endif 157b2b77a04SHong Zhang 158b2b77a04SHong Zhang PetscFunctionBegin; 159*e8bdd179SBarry Smith if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) { 160*e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 161*e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx)); 162*e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array)); 163*e8bdd179SBarry Smith } else { 1649566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 165*e8bdd179SBarry Smith } 1669566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 1676aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 168b2b77a04SHong Zhang switch (ndim) { 169b2b77a04SHong Zhang case 1: 17058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 171b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 17258a912c5SAmlan Barua #else 173e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 17458a912c5SAmlan Barua #endif 175b2b77a04SHong Zhang break; 176b2b77a04SHong Zhang case 2: 17758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 178b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 17958a912c5SAmlan Barua #else 180e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 18158a912c5SAmlan Barua #endif 182b2b77a04SHong Zhang break; 183b2b77a04SHong Zhang case 3: 18458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 185b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 18658a912c5SAmlan Barua #else 187e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 18858a912c5SAmlan Barua #endif 189b2b77a04SHong Zhang break; 190b2b77a04SHong Zhang default: 19158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 192a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 193a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 194a1b6d50cSHong Zhang #else 1958c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 196a1b6d50cSHong Zhang #endif 19758a912c5SAmlan Barua #else 198a31b9140SHong Zhang fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 19958a912c5SAmlan Barua #endif 200b2b77a04SHong Zhang break; 201b2b77a04SHong Zhang } 202*e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) { 203*e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 204*e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array)); 205*e8bdd179SBarry Smith PetscCall(VecDestroy(&xx)); 206*e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array)); 207*e8bdd179SBarry Smith } else { 208f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 209b2b77a04SHong Zhang fftw->boutarray = y_array; 210*e8bdd179SBarry Smith } 211*e8bdd179SBarry Smith } 212b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 2137e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 214b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 2157e4bc134SDominic Meiser #else 2167e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 2177e4bc134SDominic Meiser #endif 218b2b77a04SHong Zhang } else { 2192f613bf5SBarry Smith fftw_execute(fftw->p_backward); 220b2b77a04SHong Zhang } 2219566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2229566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 2233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 224b2b77a04SHong Zhang } 225b2b77a04SHong Zhang 2260cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 227d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) 228d71ae5a4SJacob Faibussowitsch { 229b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 230b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 231f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 232f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 233c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 234ce94432eSBarry Smith MPI_Comm comm; 235*e8bdd179SBarry Smith Vec xx; 236b2b77a04SHong Zhang 237b2b77a04SHong Zhang PetscFunctionBegin; 2389566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 239*e8bdd179SBarry Smith if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) { 240*e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 241*e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx)); 242*e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array)); 243*e8bdd179SBarry Smith } else { 2449566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 245*e8bdd179SBarry Smith } 2469566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 2476aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 248b2b77a04SHong Zhang switch (ndim) { 249b2b77a04SHong Zhang case 1: 2503c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 251b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 252ae0a50aaSHong Zhang #else 2534f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 2543c3a9c75SAmlan Barua #endif 255b2b77a04SHong Zhang break; 256b2b77a04SHong Zhang case 2: 25797504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 258b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2593c3a9c75SAmlan Barua #else 2603c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2613c3a9c75SAmlan Barua #endif 262b2b77a04SHong Zhang break; 263b2b77a04SHong Zhang case 3: 2643c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 265b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2663c3a9c75SAmlan Barua #else 26751d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2683c3a9c75SAmlan Barua #endif 269b2b77a04SHong Zhang break; 270b2b77a04SHong Zhang default: 2713c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 272c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2733c3a9c75SAmlan Barua #else 2743c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2753c3a9c75SAmlan Barua #endif 276b2b77a04SHong Zhang break; 277b2b77a04SHong Zhang } 278*e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) { 279*e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 280*e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array)); 281*e8bdd179SBarry Smith PetscCall(VecDestroy(&xx)); 282*e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array)); 283*e8bdd179SBarry Smith } else { 284f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 285b2b77a04SHong Zhang fftw->foutarray = y_array; 286*e8bdd179SBarry Smith } 287*e8bdd179SBarry Smith } 288b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 289b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 290b2b77a04SHong Zhang } else { 291b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 292b2b77a04SHong Zhang } 2939566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2949566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 2953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 296b2b77a04SHong Zhang } 297b2b77a04SHong Zhang 298d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) 299d71ae5a4SJacob Faibussowitsch { 300b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 301b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 302f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 303f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 304c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 305ce94432eSBarry Smith MPI_Comm comm; 306*e8bdd179SBarry Smith Vec xx; 307b2b77a04SHong Zhang 308b2b77a04SHong Zhang PetscFunctionBegin; 3099566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 310*e8bdd179SBarry Smith if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) { 311*e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 312*e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx)); 313*e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array)); 314*e8bdd179SBarry Smith } else { 3159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 316*e8bdd179SBarry Smith } 3179566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 3186aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 319b2b77a04SHong Zhang switch (ndim) { 320b2b77a04SHong Zhang case 1: 3213c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 322b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 323ae0a50aaSHong Zhang #else 3244f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 3253c3a9c75SAmlan Barua #endif 326b2b77a04SHong Zhang break; 327b2b77a04SHong Zhang case 2: 32897504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 329b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3303c3a9c75SAmlan Barua #else 3313c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3323c3a9c75SAmlan Barua #endif 333b2b77a04SHong Zhang break; 334b2b77a04SHong Zhang case 3: 3353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 336b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3373c3a9c75SAmlan Barua #else 3383c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3393c3a9c75SAmlan Barua #endif 340b2b77a04SHong Zhang break; 341b2b77a04SHong Zhang default: 3423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 343c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3443c3a9c75SAmlan Barua #else 3453c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3463c3a9c75SAmlan Barua #endif 347b2b77a04SHong Zhang break; 348b2b77a04SHong Zhang } 349*e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) { 350*e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 351*e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array)); 352*e8bdd179SBarry Smith PetscCall(VecDestroy(&xx)); 353*e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array)); 354*e8bdd179SBarry Smith } else { 355f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 356b2b77a04SHong Zhang fftw->boutarray = y_array; 357*e8bdd179SBarry Smith } 358*e8bdd179SBarry Smith } 359b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 360b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 361b2b77a04SHong Zhang } else { 3622f613bf5SBarry Smith fftw_execute(fftw->p_backward); 363b2b77a04SHong Zhang } 3649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 3659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 3663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 367b2b77a04SHong Zhang } 3680cf2e031SBarry Smith #endif 369b2b77a04SHong Zhang 370d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_FFTW(Mat A) 371d71ae5a4SJacob Faibussowitsch { 372b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 373b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 374b2b77a04SHong Zhang 375b2b77a04SHong Zhang PetscFunctionBegin; 376b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 377b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 378ad540459SPierre Jolivet if (fftw->iodims) free(fftw->iodims); 3799566063dSJacob Faibussowitsch PetscCall(PetscFree(fftw->dim_fftw)); 3809566063dSJacob Faibussowitsch PetscCall(PetscFree(fft->data)); 3812e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 3822e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 3832e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 3840cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 3856ccf0b3eSHong Zhang fftw_mpi_cleanup(); 3860cf2e031SBarry Smith #endif 3873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 388b2b77a04SHong Zhang } 389b2b77a04SHong Zhang 3900cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 391c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 392d71ae5a4SJacob Faibussowitsch PetscErrorCode VecDestroy_MPIFFTW(Vec v) 393d71ae5a4SJacob Faibussowitsch { 394b2b77a04SHong Zhang PetscScalar *array; 395b2b77a04SHong Zhang 396b2b77a04SHong Zhang PetscFunctionBegin; 3979566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array)); 3982f613bf5SBarry Smith fftw_free((fftw_complex *)array); 3999566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array)); 4009566063dSJacob Faibussowitsch PetscCall(VecDestroy_MPI(v)); 4013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 402b2b77a04SHong Zhang } 4030cf2e031SBarry Smith #endif 404b2b77a04SHong Zhang 4050cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 406d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) 407d71ae5a4SJacob Faibussowitsch { 4085b113e21Ss-sajid-ali Mat A; 4095b113e21Ss-sajid-ali 4105b113e21Ss-sajid-ali PetscFunctionBegin; 4119566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 4129566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 4133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4145b113e21Ss-sajid-ali } 4155b113e21Ss-sajid-ali 416d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) 417d71ae5a4SJacob Faibussowitsch { 4185b113e21Ss-sajid-ali Mat A; 4195b113e21Ss-sajid-ali 4205b113e21Ss-sajid-ali PetscFunctionBegin; 4219566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 4229566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 4233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4245b113e21Ss-sajid-ali } 4255b113e21Ss-sajid-ali 426d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 427d71ae5a4SJacob Faibussowitsch { 4285b113e21Ss-sajid-ali Mat A; 4295b113e21Ss-sajid-ali 4305b113e21Ss-sajid-ali PetscFunctionBegin; 4319566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 4329566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 4333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4345b113e21Ss-sajid-ali } 4350cf2e031SBarry Smith #endif 4365b113e21Ss-sajid-ali 4374be45526SHong Zhang /*@ 4382a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 43911a5261eSBarry Smith parallel layout determined by `MATFFTW` 4404f7415efSAmlan Barua 441c3339decSBarry Smith Collective 4424f7415efSAmlan Barua 4434f7415efSAmlan Barua Input Parameter: 4443564f093SHong Zhang . A - the matrix 4454f7415efSAmlan Barua 446d8d19677SJose E. Roman Output Parameters: 447cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 4486b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW 449cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4504f7415efSAmlan Barua 4512ef1f0ffSBarry Smith Options Database Key: 4522ef1f0ffSBarry Smith . -mat_fftw_plannerflags - set FFTW planner flags 4532ef1f0ffSBarry Smith 4544f7415efSAmlan Barua Level: advanced 4553564f093SHong Zhang 45611a5261eSBarry Smith Notes: 45711a5261eSBarry Smith The parallel layout of output of forward FFTW is always same as the input 45897504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 45997504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 46011a5261eSBarry Smith 46111a5261eSBarry Smith We need to provide enough space while doing parallel real transform. 46297504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 46397504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 464e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 465e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 466e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 46711a5261eSBarry Smith 468e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 469e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 470e0ec536eSAmlan Barua each processor and returns that. 4714f7415efSAmlan Barua 472fe59aa6dSJacob Faibussowitsch Developer Notes: 47311a5261eSBarry Smith How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 47411a5261eSBarry Smith 4751cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 4764be45526SHong Zhang @*/ 477d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) 478d71ae5a4SJacob Faibussowitsch { 4794be45526SHong Zhang PetscFunctionBegin; 480cac4c232SBarry Smith PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 4813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4824be45526SHong Zhang } 4834be45526SHong Zhang 484d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) 485d71ae5a4SJacob Faibussowitsch { 4864f7415efSAmlan Barua PetscMPIInt size, rank; 487ce94432eSBarry Smith MPI_Comm comm; 4884f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 4894f7415efSAmlan Barua 4904f7415efSAmlan Barua PetscFunctionBegin; 4914f7415efSAmlan Barua PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4924f7415efSAmlan Barua PetscValidType(A, 1); 4939566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 4944f7415efSAmlan Barua 4959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 4969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4974f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4984f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4999566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 5009566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 5019566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 5024f7415efSAmlan Barua #else 5039566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 5049566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 5059566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 5064f7415efSAmlan Barua #endif 5070cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 50897504071SAmlan Barua } else { /* parallel cases */ 5090cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 5100cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 5114f7415efSAmlan Barua ptrdiff_t alloc_local, local_n0, local_0_start; 5129496c9aeSAmlan Barua ptrdiff_t local_n1; 5139496c9aeSAmlan Barua fftw_complex *data_fout; 5149496c9aeSAmlan Barua ptrdiff_t local_1_start; 5159496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 5169496c9aeSAmlan Barua fftw_complex *data_fin, *data_bout; 5179496c9aeSAmlan Barua #else 5184f7415efSAmlan Barua double *data_finr, *data_boutr; 5196ccf0b3eSHong Zhang PetscInt n1, N1; 5209496c9aeSAmlan Barua ptrdiff_t temp; 5219496c9aeSAmlan Barua #endif 5229496c9aeSAmlan Barua 5234f7415efSAmlan Barua switch (ndim) { 5244f7415efSAmlan Barua case 1: 52557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5264f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 52757625b84SAmlan Barua #else 52857625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 52957625b84SAmlan Barua if (fin) { 53057625b84SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5319566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 5329566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5335b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 53457625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 53557625b84SAmlan Barua } 53657625b84SAmlan Barua if (fout) { 53757625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5389566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 5399566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5405b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 54157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 54257625b84SAmlan Barua } 54357625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 54457625b84SAmlan Barua if (bout) { 54557625b84SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5469566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 5479566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5485b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 54957625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 55057625b84SAmlan Barua } 55157625b84SAmlan Barua break; 55257625b84SAmlan Barua #endif 5534f7415efSAmlan Barua case 2: 55497504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5554f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 5569371c9d4SSatish Balay N1 = 2 * dim[0] * (dim[1] / 2 + 1); 5579371c9d4SSatish Balay n1 = 2 * local_n0 * (dim[1] / 2 + 1); 5584f7415efSAmlan Barua if (fin) { 5594f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5609566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 5619566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5625b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5634f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5644f7415efSAmlan Barua } 56557625b84SAmlan Barua if (fout) { 56657625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5679566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 5689566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5695b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 57057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 57157625b84SAmlan Barua } 5725b113e21Ss-sajid-ali if (bout) { 5735b113e21Ss-sajid-ali data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5749566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 5759566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5765b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5775b113e21Ss-sajid-ali (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5785b113e21Ss-sajid-ali } 5794f7415efSAmlan Barua #else 5804f7415efSAmlan Barua /* Get local size */ 5814f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 5824f7415efSAmlan Barua if (fin) { 5834f7415efSAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5849566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 5859566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5865b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5874f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5884f7415efSAmlan Barua } 5894f7415efSAmlan Barua if (fout) { 5904f7415efSAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5919566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 5929566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5935b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 5944f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5954f7415efSAmlan Barua } 5964f7415efSAmlan Barua if (bout) { 5974f7415efSAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5989566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 5999566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6005b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6014f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6024f7415efSAmlan Barua } 6034f7415efSAmlan Barua #endif 6044f7415efSAmlan Barua break; 6054f7415efSAmlan Barua case 3: 6064f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6074f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 6089371c9d4SSatish Balay N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 6099371c9d4SSatish Balay n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 6104f7415efSAmlan Barua if (fin) { 6114f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6129566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 6139566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6145b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6154f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6164f7415efSAmlan Barua } 6175b113e21Ss-sajid-ali if (fout) { 6185b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6199566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 6209566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6215b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6225b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6235b113e21Ss-sajid-ali } 6244f7415efSAmlan Barua if (bout) { 6254f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6269566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 6279566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6285b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6294f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6304f7415efSAmlan Barua } 6314f7415efSAmlan Barua #else 6320c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 6330c9b18e4SAmlan Barua if (fin) { 6340c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6359566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 6369566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6375b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6380c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6390c9b18e4SAmlan Barua } 6400c9b18e4SAmlan Barua if (fout) { 6410c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6429566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6439566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6445b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6450c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6460c9b18e4SAmlan Barua } 6470c9b18e4SAmlan Barua if (bout) { 6480c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6499566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 6509566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6515b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6520c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6530c9b18e4SAmlan Barua } 6544f7415efSAmlan Barua #endif 6554f7415efSAmlan Barua break; 6564f7415efSAmlan Barua default: 6574f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6584f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 6594f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 6604f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 661f4f49eeaSPierre Jolivet N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 6624f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 6634f7415efSAmlan Barua 6644f7415efSAmlan Barua if (fin) { 6654f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6669566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 6679566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6685b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6694f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6704f7415efSAmlan Barua } 6715b113e21Ss-sajid-ali if (fout) { 6725b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6739566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 6749566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6755b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6765b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6775b113e21Ss-sajid-ali } 6784f7415efSAmlan Barua if (bout) { 6794f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6809566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 6819566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6825b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6839496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6844f7415efSAmlan Barua } 6854f7415efSAmlan Barua #else 6860c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 6870c9b18e4SAmlan Barua if (fin) { 6880c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6899566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 6909566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6915b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6920c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6930c9b18e4SAmlan Barua } 6940c9b18e4SAmlan Barua if (fout) { 6950c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6969566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6979566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6985b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6990c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7000c9b18e4SAmlan Barua } 7010c9b18e4SAmlan Barua if (bout) { 7020c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 7039566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 7049566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 7055b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 7060c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 7070c9b18e4SAmlan Barua } 7084f7415efSAmlan Barua #endif 7094f7415efSAmlan Barua break; 7104f7415efSAmlan Barua } 711f9d91177SJunchao Zhang /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 712f9d91177SJunchao Zhang v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 713da81f932SPierre Jolivet memory leaks. We void these pointers here to avoid accident uses. 714f9d91177SJunchao Zhang */ 715f9d91177SJunchao Zhang if (fin) (*fin)->ops->replacearray = NULL; 716f9d91177SJunchao Zhang if (fout) (*fout)->ops->replacearray = NULL; 717f9d91177SJunchao Zhang if (bout) (*bout)->ops->replacearray = NULL; 7180cf2e031SBarry Smith #endif 7194f7415efSAmlan Barua } 7203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7214f7415efSAmlan Barua } 7224f7415efSAmlan Barua 7233564f093SHong Zhang /*@ 72411a5261eSBarry Smith VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 72554efbe56SHong Zhang 726c3339decSBarry Smith Collective 7273564f093SHong Zhang 7283564f093SHong Zhang Input Parameters: 7293564f093SHong Zhang + A - FFTW matrix 7303564f093SHong Zhang - x - the PETSc vector 7313564f093SHong Zhang 7322fe279fdSBarry Smith Output Parameter: 7333564f093SHong Zhang . y - the FFTW vector 7343564f093SHong Zhang 735b2b77a04SHong Zhang Level: intermediate 7363564f093SHong Zhang 73711a5261eSBarry Smith Note: 73811a5261eSBarry Smith For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 73997504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 74097504071SAmlan Barua zeros. This routine does that job by scattering operation. 74197504071SAmlan Barua 7421cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 7433564f093SHong Zhang @*/ 744d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) 745d71ae5a4SJacob Faibussowitsch { 7463564f093SHong Zhang PetscFunctionBegin; 747cac4c232SBarry Smith PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 7483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7493564f093SHong Zhang } 7503c3a9c75SAmlan Barua 75166976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) 752d71ae5a4SJacob Faibussowitsch { 753ce94432eSBarry Smith MPI_Comm comm; 7543c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 7559496c9aeSAmlan Barua PetscInt low; 7567a21806cSHong Zhang PetscMPIInt rank, size; 7577a21806cSHong Zhang PetscInt vsize, vsize1; 7583c3a9c75SAmlan Barua VecScatter vecscat; 7590cf2e031SBarry Smith IS list1; 7603c3a9c75SAmlan Barua 7613564f093SHong Zhang PetscFunctionBegin; 7629566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 7639566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 7649566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7659566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y, &low, NULL)); 7663c3a9c75SAmlan Barua 7673564f093SHong Zhang if (size == 1) { 7689566063dSJacob Faibussowitsch PetscCall(VecGetSize(x, &vsize)); 7699566063dSJacob Faibussowitsch PetscCall(VecGetSize(y, &vsize1)); 7709566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 7719566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 7729566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7739566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7749566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7759566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7760cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 7773564f093SHong Zhang } else { 7780cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 7790cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 7800cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 7810cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 7820cf2e031SBarry Smith IS list2; 7830cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 7840cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 7850cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 7860cf2e031SBarry Smith PetscInt NM; 7870cf2e031SBarry Smith ptrdiff_t temp; 7880cf2e031SBarry Smith #endif 7890cf2e031SBarry Smith 7903c3a9c75SAmlan Barua switch (ndim) { 7913c3a9c75SAmlan Barua case 1: 79264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7937a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 79426fbe8dcSKarl Rupp 7959566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 7969566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 7979566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 7989566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7999566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8009566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8019566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8029566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 80364657d84SAmlan Barua #else 804e5d7f247SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 80564657d84SAmlan Barua #endif 8063c3a9c75SAmlan Barua break; 8073c3a9c75SAmlan Barua case 2: 808bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8097a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 81026fbe8dcSKarl Rupp 8119566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 8129566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 8139566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8149566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8159566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8169566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8179566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8189566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 819bd59e6c6SAmlan Barua #else 820c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 82126fbe8dcSKarl Rupp 82257508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1)); 82357508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2)); 8243c3a9c75SAmlan Barua 8253564f093SHong Zhang if (dim[1] % 2 == 0) { 8263c3a9c75SAmlan Barua NM = dim[1] + 2; 8273564f093SHong Zhang } else { 8283c3a9c75SAmlan Barua NM = dim[1] + 1; 8293564f093SHong Zhang } 8303c3a9c75SAmlan Barua for (i = 0; i < local_n0; i++) { 8313c3a9c75SAmlan Barua for (j = 0; j < dim[1]; j++) { 8323c3a9c75SAmlan Barua tempindx = i * dim[1] + j; 8333c3a9c75SAmlan Barua tempindx1 = i * NM + j; 83426fbe8dcSKarl Rupp 8355b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 8363c3a9c75SAmlan Barua indx2[tempindx] = low + tempindx1; 8373c3a9c75SAmlan Barua } 8383c3a9c75SAmlan Barua } 8393c3a9c75SAmlan Barua 8409566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 8419566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 8423c3a9c75SAmlan Barua 8439566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8449566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8459566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8469566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8479566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8489566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8499566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8509566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 851bd59e6c6SAmlan Barua #endif 8529496c9aeSAmlan Barua break; 8533c3a9c75SAmlan Barua 8543c3a9c75SAmlan Barua case 3: 855bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8567a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 85726fbe8dcSKarl Rupp 8589566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 8599566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 8609566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8619566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8629566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8639566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8649566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8659566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 866bd59e6c6SAmlan Barua #else 867c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 868f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 8697a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 87026fbe8dcSKarl Rupp 87157508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1)); 87257508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2)); 87351d1eed7SAmlan Barua 874db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 875db4deed7SKarl Rupp else NM = dim[2] + 1; 87651d1eed7SAmlan Barua 87751d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 87851d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 87951d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 88051d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 88151d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 88226fbe8dcSKarl Rupp 88351d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 88451d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 88551d1eed7SAmlan Barua } 88651d1eed7SAmlan Barua } 88751d1eed7SAmlan Barua } 88851d1eed7SAmlan Barua 8899566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 8909566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 8919566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8929566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8939566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8949566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8959566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8969566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8979566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8989566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 899bd59e6c6SAmlan Barua #endif 9009496c9aeSAmlan Barua break; 9013c3a9c75SAmlan Barua 9023c3a9c75SAmlan Barua default: 903bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9047a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 90526fbe8dcSKarl Rupp 9069566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 9079566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 9089566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 9099566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9109566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9119566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9129566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 9139566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 914bd59e6c6SAmlan Barua #else 915c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 916f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 917e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 91826fbe8dcSKarl Rupp 919e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 92026fbe8dcSKarl Rupp 9217a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 92226fbe8dcSKarl Rupp 923e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 924e5d7f247SAmlan Barua 925e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 926e5d7f247SAmlan Barua 92757508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1)); 92857508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2)); 929e5d7f247SAmlan Barua 930db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 931db4deed7SKarl Rupp else NM = 1; 932e5d7f247SAmlan Barua 9336971673cSAmlan Barua j = low; 93457508eceSPierre Jolivet for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) { 9356971673cSAmlan Barua indx1[i] = local_0_start * partial_dim + i; 9366971673cSAmlan Barua indx2[i] = j; 93726fbe8dcSKarl Rupp if (k % dim[ndim - 1] == 0) j += NM; 9386971673cSAmlan Barua j++; 9396971673cSAmlan Barua } 9409566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 9419566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 9429566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 9439566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9449566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9459566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9469566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 9479566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 9489566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 9499566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 950bd59e6c6SAmlan Barua #endif 9519496c9aeSAmlan Barua break; 9523c3a9c75SAmlan Barua } 9530cf2e031SBarry Smith #endif 954e81bb599SAmlan Barua } 9553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9563c3a9c75SAmlan Barua } 9573c3a9c75SAmlan Barua 9583564f093SHong Zhang /*@ 95911a5261eSBarry Smith VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 9603564f093SHong Zhang 961c3339decSBarry Smith Collective 9623564f093SHong Zhang 9633564f093SHong Zhang Input Parameters: 96411a5261eSBarry Smith + A - `MATFFTW` matrix 9653564f093SHong Zhang - x - FFTW vector 9663564f093SHong Zhang 9672fe279fdSBarry Smith Output Parameter: 9683564f093SHong Zhang . y - PETSc vector 9693564f093SHong Zhang 9703564f093SHong Zhang Level: intermediate 9713564f093SHong Zhang 97211a5261eSBarry Smith Note: 97311a5261eSBarry Smith While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 97411a5261eSBarry Smith `VecScatterFFTWToPetsc()` removes those extra zeros. 9753564f093SHong Zhang 9761cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 9773564f093SHong Zhang @*/ 978d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) 979d71ae5a4SJacob Faibussowitsch { 9803c3a9c75SAmlan Barua PetscFunctionBegin; 981cac4c232SBarry Smith PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 9823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9833c3a9c75SAmlan Barua } 98454efbe56SHong Zhang 98566976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) 986d71ae5a4SJacob Faibussowitsch { 987ce94432eSBarry Smith MPI_Comm comm; 9885b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 9899496c9aeSAmlan Barua PetscInt low; 9907a21806cSHong Zhang PetscMPIInt rank, size; 9915b3e41ffSAmlan Barua VecScatter vecscat; 9920cf2e031SBarry Smith IS list1; 9939496c9aeSAmlan Barua 9943564f093SHong Zhang PetscFunctionBegin; 9959566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 9989566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, NULL)); 9995b3e41ffSAmlan Barua 1000e81bb599SAmlan Barua if (size == 1) { 10019566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 10029566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 10039566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10049566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10059566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10069566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 1007e81bb599SAmlan Barua 10080cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 10093564f093SHong Zhang } else { 10100cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 10110cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 10120cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 10130cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 10140cf2e031SBarry Smith IS list2; 10150cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 10160cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 10170cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 10180cf2e031SBarry Smith PetscInt NM; 10190cf2e031SBarry Smith ptrdiff_t temp; 10200cf2e031SBarry Smith #endif 1021e81bb599SAmlan Barua switch (ndim) { 1022e81bb599SAmlan Barua case 1: 102364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10247a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 102526fbe8dcSKarl Rupp 10269566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 10279566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 10289566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 10299566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10309566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10319566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10329566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10339566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 103464657d84SAmlan Barua #else 10356a506ed8SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 103664657d84SAmlan Barua #endif 10375b3e41ffSAmlan Barua break; 10385b3e41ffSAmlan Barua case 2: 1039bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10407a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 104126fbe8dcSKarl Rupp 10429566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 10439566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 10449566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10459566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10469566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10479566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10489566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10499566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1050bd59e6c6SAmlan Barua #else 10517a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 105226fbe8dcSKarl Rupp 105357508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1)); 105457508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2)); 10555b3e41ffSAmlan Barua 1056db4deed7SKarl Rupp if (dim[1] % 2 == 0) NM = dim[1] + 2; 1057db4deed7SKarl Rupp else NM = dim[1] + 1; 10585b3e41ffSAmlan Barua 10595b3e41ffSAmlan Barua for (i = 0; i < local_n0; i++) { 10605b3e41ffSAmlan Barua for (j = 0; j < dim[1]; j++) { 10615b3e41ffSAmlan Barua tempindx = i * dim[1] + j; 10625b3e41ffSAmlan Barua tempindx1 = i * NM + j; 106326fbe8dcSKarl Rupp 10645b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 10655b3e41ffSAmlan Barua indx2[tempindx] = low + tempindx1; 10665b3e41ffSAmlan Barua } 10675b3e41ffSAmlan Barua } 10685b3e41ffSAmlan Barua 10699566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 10709566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 10715b3e41ffSAmlan Barua 10729566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10739566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10749566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10759566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10769566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10779566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 10789566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 10799566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1080bd59e6c6SAmlan Barua #endif 10819496c9aeSAmlan Barua break; 10825b3e41ffSAmlan Barua case 3: 1083bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10847a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 108526fbe8dcSKarl Rupp 10869566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 10879566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 10889566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 10899566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10909566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10919566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10929566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10939566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1094bd59e6c6SAmlan Barua #else 10957a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 109626fbe8dcSKarl Rupp 109757508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1)); 109857508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2)); 109951d1eed7SAmlan Barua 1100db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 1101db4deed7SKarl Rupp else NM = dim[2] + 1; 110251d1eed7SAmlan Barua 110351d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 110451d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 110551d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 110651d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 110751d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 110826fbe8dcSKarl Rupp 110951d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 111051d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 111151d1eed7SAmlan Barua } 111251d1eed7SAmlan Barua } 111351d1eed7SAmlan Barua } 111451d1eed7SAmlan Barua 11159566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 11169566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 111751d1eed7SAmlan Barua 11189566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 11199566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11209566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11219566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11229566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11239566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 11249566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 11259566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1126bd59e6c6SAmlan Barua #endif 11279496c9aeSAmlan Barua break; 11285b3e41ffSAmlan Barua default: 1129bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 11307a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 113126fbe8dcSKarl Rupp 11329566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 11339566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 11349566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 11359566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11369566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11379566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11389566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11399566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1140bd59e6c6SAmlan Barua #else 1141ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 114226fbe8dcSKarl Rupp 1143ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 114426fbe8dcSKarl Rupp 1145c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 114626fbe8dcSKarl Rupp 1147ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1148ba6e06d0SAmlan Barua 1149ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1150ba6e06d0SAmlan Barua 115157508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1)); 115257508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2)); 1153ba6e06d0SAmlan Barua 1154db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 1155db4deed7SKarl Rupp else NM = 1; 1156ba6e06d0SAmlan Barua 1157ba6e06d0SAmlan Barua j = low; 115857508eceSPierre Jolivet for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) { 1159ba6e06d0SAmlan Barua indx1[i] = local_0_start * partial_dim + i; 1160ba6e06d0SAmlan Barua indx2[i] = j; 11613564f093SHong Zhang if (k % dim[ndim - 1] == 0) j += NM; 1162ba6e06d0SAmlan Barua j++; 1163ba6e06d0SAmlan Barua } 11649566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 11659566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1166ba6e06d0SAmlan Barua 11679566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 11689566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11699566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11709566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11719566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11729566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 11739566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 11749566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1175bd59e6c6SAmlan Barua #endif 11769496c9aeSAmlan Barua break; 11775b3e41ffSAmlan Barua } 11780cf2e031SBarry Smith #endif 1179e81bb599SAmlan Barua } 11803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11815b3e41ffSAmlan Barua } 11825b3e41ffSAmlan Barua 1183350f1385SJose E. Roman /*MC 1184350f1385SJose E. Roman MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package. 1185350f1385SJose E. Roman 1186350f1385SJose E. Roman Level: intermediate 1187350f1385SJose E. Roman 11881cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()` 1189350f1385SJose E. Roman M*/ 1190d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1191d71ae5a4SJacob Faibussowitsch { 1192ce94432eSBarry Smith MPI_Comm comm; 1193b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 1194b2b77a04SHong Zhang Mat_FFTW *fftw; 11950cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 11965274ed1bSHong Zhang const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 11975274ed1bSHong Zhang unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1198b2b77a04SHong Zhang PetscBool flg; 11994f48bc67SAmlan Barua PetscInt p_flag, partial_dim = 1, ctr; 120011902ff2SHong Zhang PetscMPIInt size, rank; 12019496c9aeSAmlan Barua ptrdiff_t *pdim; 12029496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12030cf2e031SBarry Smith PetscInt tot_dim; 12049496c9aeSAmlan Barua #endif 12059496c9aeSAmlan Barua 1206b2b77a04SHong Zhang PetscFunctionBegin; 12079566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 12089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 12099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1210c92418ccSAmlan Barua 12110cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 12121878d478SAmlan Barua fftw_mpi_init(); 12130cf2e031SBarry Smith #endif 121411902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 121511902ff2SHong Zhang pdim[0] = dim[0]; 12168ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12178ca4c034SAmlan Barua tot_dim = 2 * dim[0]; 12188ca4c034SAmlan Barua #endif 12193564f093SHong Zhang for (ctr = 1; ctr < ndim; ctr++) { 12206882372aSHong Zhang partial_dim *= dim[ctr]; 122111902ff2SHong Zhang pdim[ctr] = dim[ctr]; 12228ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1223db4deed7SKarl Rupp if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1224db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 12258ca4c034SAmlan Barua #endif 12266882372aSHong Zhang } 12273c3a9c75SAmlan Barua 1228b2b77a04SHong Zhang if (size == 1) { 1229e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12309566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 12310cf2e031SBarry Smith fft->n = fft->N; 1232e81bb599SAmlan Barua #else 12339566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 12340cf2e031SBarry Smith fft->n = tot_dim; 12350cf2e031SBarry Smith #endif 12360cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 12370cf2e031SBarry Smith } else { 12380cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 12390cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 12400cf2e031SBarry Smith ptrdiff_t temp; 12410cf2e031SBarry Smith PetscInt N1; 1242e81bb599SAmlan Barua #endif 1243e81bb599SAmlan Barua 1244b2b77a04SHong Zhang switch (ndim) { 1245b2b77a04SHong Zhang case 1: 12463c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12473c3a9c75SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1248e5d7f247SAmlan Barua #else 12497a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 12500cf2e031SBarry Smith fft->n = (PetscInt)local_n0; 12519566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1252e5d7f247SAmlan Barua #endif 1253b2b77a04SHong Zhang break; 1254b2b77a04SHong Zhang case 2: 12555b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 12567a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 12570cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1]; 12589566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 12595b3e41ffSAmlan Barua #else 1260c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 126126fbe8dcSKarl Rupp 12620cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 12639566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 12645b3e41ffSAmlan Barua #endif 1265b2b77a04SHong Zhang break; 1266b2b77a04SHong Zhang case 3: 126751d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12687a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 126926fbe8dcSKarl Rupp 12700cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 12719566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 127251d1eed7SAmlan Barua #else 1273c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 127426fbe8dcSKarl Rupp 12750cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 12769566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 127751d1eed7SAmlan Barua #endif 1278b2b77a04SHong Zhang break; 1279b2b77a04SHong Zhang default: 1280b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12817a21806cSHong Zhang fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 128226fbe8dcSKarl Rupp 12830cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * partial_dim; 12849566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1285b3a17365SAmlan Barua #else 1286b3a17365SAmlan Barua temp = pdim[ndim - 1]; 128726fbe8dcSKarl Rupp 1288b3a17365SAmlan Barua pdim[ndim - 1] = temp / 2 + 1; 128926fbe8dcSKarl Rupp 1290c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 129126fbe8dcSKarl Rupp 12920cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 12930cf2e031SBarry Smith N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 129426fbe8dcSKarl Rupp 1295b3a17365SAmlan Barua pdim[ndim - 1] = temp; 129626fbe8dcSKarl Rupp 12979566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1298b3a17365SAmlan Barua #endif 1299b2b77a04SHong Zhang break; 1300b2b77a04SHong Zhang } 13010cf2e031SBarry Smith #endif 1302b2b77a04SHong Zhang } 13032277172eSMark Adams free(pdim); 13049566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 13054dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&fftw)); 1306b2b77a04SHong Zhang fft->data = (void *)fftw; 1307b2b77a04SHong Zhang 13080c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1309e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 131026fbe8dcSKarl Rupp 1311f4f49eeaSPierre Jolivet PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw)); 13128c1d5ab3SHong Zhang if (size == 1) { 1313a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1314a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1315a1b6d50cSHong Zhang #else 13168c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1317a1b6d50cSHong Zhang #endif 13188c1d5ab3SHong Zhang } 131926fbe8dcSKarl Rupp 1320b1301b2eSHong Zhang for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1321c92418ccSAmlan Barua 1322f4259b30SLisandro Dalcin fftw->p_forward = NULL; 1323f4259b30SLisandro Dalcin fftw->p_backward = NULL; 1324b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1325b2b77a04SHong Zhang 1326b2b77a04SHong Zhang if (size == 1) { 1327b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1328b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 13290cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 1330b2b77a04SHong Zhang } else { 1331b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1332b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 13330cf2e031SBarry Smith #endif 1334b2b77a04SHong Zhang } 1335b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1336b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 13374a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 133826fbe8dcSKarl Rupp 13399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 13409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 13419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1342b2b77a04SHong Zhang 1343b2b77a04SHong Zhang /* get runtime options */ 1344d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 13459566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 13465f80ce2aSJacob Faibussowitsch if (flg) fftw->p_flag = iplans[p_flag]; 1347d0609cedSBarry Smith PetscOptionsEnd(); 13483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1349b2b77a04SHong Zhang } 1350