1*b2b77a04SHong Zhang #define PETSCMAT_DLL 2*b2b77a04SHong Zhang 3*b2b77a04SHong Zhang /* 4*b2b77a04SHong Zhang Provides an interface to the FFTW package. 5*b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 6*b2b77a04SHong Zhang */ 7*b2b77a04SHong Zhang 8*b2b77a04SHong Zhang #include "../src/mat/impls/fft/fft.h" /*I "petscmat.h" I*/ 9*b2b77a04SHong Zhang EXTERN_C_BEGIN 10*b2b77a04SHong Zhang #include "fftw3-mpi.h" 11*b2b77a04SHong Zhang EXTERN_C_END 12*b2b77a04SHong Zhang 13*b2b77a04SHong Zhang typedef struct { 14*b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 15*b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 16*b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 17*b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 18*b2b77a04SHong Zhang } Mat_FFTW; 19*b2b77a04SHong Zhang 20*b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 21*b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 22*b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 23*b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 24*b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 25*b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 26*b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 27*b2b77a04SHong Zhang 28*b2b77a04SHong Zhang #undef __FUNCT__ 29*b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 30*b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 31*b2b77a04SHong Zhang { 32*b2b77a04SHong Zhang PetscErrorCode ierr; 33*b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 34*b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 35*b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 36*b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 37*b2b77a04SHong Zhang 38*b2b77a04SHong Zhang PetscFunctionBegin; 39*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 40*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 41*b2b77a04SHong Zhang #endif 42*b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 43*b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 44*b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 45*b2b77a04SHong Zhang switch (ndim){ 46*b2b77a04SHong Zhang case 1: 47*b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 48*b2b77a04SHong Zhang break; 49*b2b77a04SHong Zhang case 2: 50*b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 51*b2b77a04SHong Zhang break; 52*b2b77a04SHong Zhang case 3: 53*b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54*b2b77a04SHong Zhang break; 55*b2b77a04SHong Zhang default: 56*b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 57*b2b77a04SHong Zhang break; 58*b2b77a04SHong Zhang } 59*b2b77a04SHong Zhang fftw->finarray = x_array; 60*b2b77a04SHong Zhang fftw->foutarray = y_array; 61*b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 62*b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 63*b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 64*b2b77a04SHong Zhang } else { /* use existing plan */ 65*b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 66*b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 67*b2b77a04SHong Zhang } else { 68*b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 69*b2b77a04SHong Zhang } 70*b2b77a04SHong Zhang } 71*b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 72*b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 73*b2b77a04SHong Zhang PetscFunctionReturn(0); 74*b2b77a04SHong Zhang } 75*b2b77a04SHong Zhang 76*b2b77a04SHong Zhang #undef __FUNCT__ 77*b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 78*b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 79*b2b77a04SHong Zhang { 80*b2b77a04SHong Zhang PetscErrorCode ierr; 81*b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 82*b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 83*b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 84*b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 85*b2b77a04SHong Zhang 86*b2b77a04SHong Zhang PetscFunctionBegin; 87*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 88*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 89*b2b77a04SHong Zhang #endif 90*b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 91*b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 92*b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 93*b2b77a04SHong Zhang switch (ndim){ 94*b2b77a04SHong Zhang case 1: 95*b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 96*b2b77a04SHong Zhang break; 97*b2b77a04SHong Zhang case 2: 98*b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 99*b2b77a04SHong Zhang break; 100*b2b77a04SHong Zhang case 3: 101*b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 102*b2b77a04SHong Zhang break; 103*b2b77a04SHong Zhang default: 104*b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 105*b2b77a04SHong Zhang break; 106*b2b77a04SHong Zhang } 107*b2b77a04SHong Zhang fftw->binarray = x_array; 108*b2b77a04SHong Zhang fftw->boutarray = y_array; 109*b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 110*b2b77a04SHong Zhang } else { /* use existing plan */ 111*b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 112*b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 113*b2b77a04SHong Zhang } else { 114*b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 115*b2b77a04SHong Zhang } 116*b2b77a04SHong Zhang } 117*b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 118*b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 119*b2b77a04SHong Zhang PetscFunctionReturn(0); 120*b2b77a04SHong Zhang } 121*b2b77a04SHong Zhang 122*b2b77a04SHong Zhang #undef __FUNCT__ 123*b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 124*b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 125*b2b77a04SHong Zhang { 126*b2b77a04SHong Zhang PetscErrorCode ierr; 127*b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 128*b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 129*b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 130*b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 131*b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 132*b2b77a04SHong Zhang 133*b2b77a04SHong Zhang PetscFunctionBegin; 134*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 135*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 136*b2b77a04SHong Zhang #endif 137*b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 138*b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 139*b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 140*b2b77a04SHong Zhang switch (ndim){ 141*b2b77a04SHong Zhang case 1: 142*b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 143*b2b77a04SHong Zhang break; 144*b2b77a04SHong Zhang case 2: 145*b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 146*b2b77a04SHong Zhang break; 147*b2b77a04SHong Zhang case 3: 148*b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 149*b2b77a04SHong Zhang break; 150*b2b77a04SHong Zhang default: 151*b2b77a04SHong Zhang /* 152*b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 153*b2b77a04SHong Zhang */ 154*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not supported yet"); 155*b2b77a04SHong Zhang break; 156*b2b77a04SHong Zhang } 157*b2b77a04SHong Zhang fftw->finarray = x_array; 158*b2b77a04SHong Zhang fftw->foutarray = y_array; 159*b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 160*b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 161*b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 162*b2b77a04SHong Zhang } else { /* use existing plan */ 163*b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 164*b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 165*b2b77a04SHong Zhang } else { 166*b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 167*b2b77a04SHong Zhang } 168*b2b77a04SHong Zhang } 169*b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 170*b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 171*b2b77a04SHong Zhang PetscFunctionReturn(0); 172*b2b77a04SHong Zhang } 173*b2b77a04SHong Zhang 174*b2b77a04SHong Zhang #undef __FUNCT__ 175*b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 176*b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 177*b2b77a04SHong Zhang { 178*b2b77a04SHong Zhang PetscErrorCode ierr; 179*b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 180*b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 181*b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 182*b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 183*b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 184*b2b77a04SHong Zhang 185*b2b77a04SHong Zhang PetscFunctionBegin; 186*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 187*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 188*b2b77a04SHong Zhang #endif 189*b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 190*b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 191*b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 192*b2b77a04SHong Zhang switch (ndim){ 193*b2b77a04SHong Zhang case 1: 194*b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 195*b2b77a04SHong Zhang break; 196*b2b77a04SHong Zhang case 2: 197*b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 198*b2b77a04SHong Zhang break; 199*b2b77a04SHong Zhang case 3: 200*b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 201*b2b77a04SHong Zhang break; 202*b2b77a04SHong Zhang default: 203*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not supported yet"); 204*b2b77a04SHong Zhang /* fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); */ 205*b2b77a04SHong Zhang break; 206*b2b77a04SHong Zhang } 207*b2b77a04SHong Zhang fftw->binarray = x_array; 208*b2b77a04SHong Zhang fftw->boutarray = y_array; 209*b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 210*b2b77a04SHong Zhang } else { /* use existing plan */ 211*b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 212*b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 213*b2b77a04SHong Zhang } else { 214*b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 215*b2b77a04SHong Zhang } 216*b2b77a04SHong Zhang } 217*b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 218*b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 219*b2b77a04SHong Zhang PetscFunctionReturn(0); 220*b2b77a04SHong Zhang } 221*b2b77a04SHong Zhang 222*b2b77a04SHong Zhang #undef __FUNCT__ 223*b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 224*b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 225*b2b77a04SHong Zhang { 226*b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 227*b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 228*b2b77a04SHong Zhang PetscErrorCode ierr; 229*b2b77a04SHong Zhang 230*b2b77a04SHong Zhang PetscFunctionBegin; 231*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 232*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 233*b2b77a04SHong Zhang #endif 234*b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 235*b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 236*b2b77a04SHong Zhang ierr = PetscFree(fftw);CHKERRQ(ierr); 237*b2b77a04SHong Zhang PetscFunctionReturn(0); 238*b2b77a04SHong Zhang } 239*b2b77a04SHong Zhang 240*b2b77a04SHong Zhang #include "../src/vec/vec/impls/mpi/pvecimpl.h" /*I "petscvec.h" I*/ 241*b2b77a04SHong Zhang #undef __FUNCT__ 242*b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 243*b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 244*b2b77a04SHong Zhang { 245*b2b77a04SHong Zhang PetscErrorCode ierr; 246*b2b77a04SHong Zhang PetscScalar *array; 247*b2b77a04SHong Zhang 248*b2b77a04SHong Zhang PetscFunctionBegin; 249*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 250*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 251*b2b77a04SHong Zhang #endif 252*b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 253*b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 254*b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 255*b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 256*b2b77a04SHong Zhang PetscFunctionReturn(0); 257*b2b77a04SHong Zhang } 258*b2b77a04SHong Zhang 259*b2b77a04SHong Zhang #undef __FUNCT__ 260*b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 261*b2b77a04SHong Zhang /* 262*b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 263*b2b77a04SHong Zhang parallel layout determined by FFTW 264*b2b77a04SHong Zhang 265*b2b77a04SHong Zhang Collective on Mat 266*b2b77a04SHong Zhang 267*b2b77a04SHong Zhang Input Parameter: 268*b2b77a04SHong Zhang . mat - the matrix 269*b2b77a04SHong Zhang 270*b2b77a04SHong Zhang Output Parameter: 271*b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 272*b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 273*b2b77a04SHong Zhang 274*b2b77a04SHong Zhang Level: advanced 275*b2b77a04SHong Zhang 276*b2b77a04SHong Zhang .seealso: MatCreateFFTW() 277*b2b77a04SHong Zhang */ 278*b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 279*b2b77a04SHong Zhang { 280*b2b77a04SHong Zhang PetscErrorCode ierr; 281*b2b77a04SHong Zhang PetscMPIInt size,rank; 282*b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 283*b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 284*b2b77a04SHong Zhang PetscInt N=fft->N; 285*b2b77a04SHong Zhang 286*b2b77a04SHong Zhang PetscFunctionBegin; 287*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 288*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 289*b2b77a04SHong Zhang #endif 290*b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 291*b2b77a04SHong Zhang PetscValidType(A,1); 292*b2b77a04SHong Zhang 293*b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 294*b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 295*b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 296*b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 297*b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 298*b2b77a04SHong Zhang } else { /* mpi case */ 299*b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 300*b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 301*b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 302*b2b77a04SHong Zhang 303*b2b77a04SHong Zhang switch (ndim){ 304*b2b77a04SHong Zhang case 1: 305*b2b77a04SHong Zhang SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Not supported yet"); 306*b2b77a04SHong Zhang break; 307*b2b77a04SHong Zhang case 2: 308*b2b77a04SHong Zhang /* Get local size */ 309*b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 310*b2b77a04SHong Zhang if (fin) { 311*b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 312*b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 313*b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 314*b2b77a04SHong Zhang } 315*b2b77a04SHong Zhang if (fout) { 316*b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 317*b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 318*b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 319*b2b77a04SHong Zhang } 320*b2b77a04SHong Zhang break; 321*b2b77a04SHong Zhang case 3: 322*b2b77a04SHong Zhang SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Not supported yet"); 323*b2b77a04SHong Zhang break; 324*b2b77a04SHong Zhang default: 325*b2b77a04SHong Zhang SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Not supported yet"); 326*b2b77a04SHong Zhang break; 327*b2b77a04SHong Zhang } 328*b2b77a04SHong Zhang } 329*b2b77a04SHong Zhang PetscFunctionReturn(0); 330*b2b77a04SHong Zhang } 331*b2b77a04SHong Zhang 332*b2b77a04SHong Zhang EXTERN_C_BEGIN 333*b2b77a04SHong Zhang #undef __FUNCT__ 334*b2b77a04SHong Zhang #define __FUNCT__ "MatCreate_FFTW" 335*b2b77a04SHong Zhang /* 336*b2b77a04SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT 337*b2b77a04SHong Zhang via the external package FFTW 338*b2b77a04SHong Zhang 339*b2b77a04SHong Zhang Options Database Keys: 340*b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 341*b2b77a04SHong Zhang 342*b2b77a04SHong Zhang Level: intermediate 343*b2b77a04SHong Zhang 344*b2b77a04SHong Zhang */ 345*b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 346*b2b77a04SHong Zhang { 347*b2b77a04SHong Zhang PetscErrorCode ierr; 348*b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 349*b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 350*b2b77a04SHong Zhang Mat_FFTW *fftw; 351*b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 352*b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 353*b2b77a04SHong Zhang PetscBool flg; 354*b2b77a04SHong Zhang PetscInt p_flag; 355*b2b77a04SHong Zhang PetscMPIInt size; 356*b2b77a04SHong Zhang 357*b2b77a04SHong Zhang PetscFunctionBegin; 358*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 359*b2b77a04SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 360*b2b77a04SHong Zhang #endif 361*b2b77a04SHong Zhang 362*b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 363*b2b77a04SHong Zhang //ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 364*b2b77a04SHong Zhang 365*b2b77a04SHong Zhang 366*b2b77a04SHong Zhang if (size == 1) { 367*b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 368*b2b77a04SHong Zhang n = N; 369*b2b77a04SHong Zhang } else { 370*b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 371*b2b77a04SHong Zhang switch (ndim){ 372*b2b77a04SHong Zhang case 1: 373*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not implemented yet"); 374*b2b77a04SHong Zhang break; 375*b2b77a04SHong Zhang case 2: 376*b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 377*b2b77a04SHong Zhang /* 378*b2b77a04SHong Zhang PetscMPIInt rank; 379*b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 380*b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 381*b2b77a04SHong Zhang */ 382*b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 383*b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 384*b2b77a04SHong Zhang break; 385*b2b77a04SHong Zhang case 3: 386*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not implemented yet"); 387*b2b77a04SHong Zhang break; 388*b2b77a04SHong Zhang default: 389*b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not implemented yet"); 390*b2b77a04SHong Zhang break; 391*b2b77a04SHong Zhang } 392*b2b77a04SHong Zhang } 393*b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 394*b2b77a04SHong Zhang 395*b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 396*b2b77a04SHong Zhang fft->data = (void*)fftw; 397*b2b77a04SHong Zhang 398*b2b77a04SHong Zhang fft->n = n; 399*b2b77a04SHong Zhang fftw->p_forward = 0; 400*b2b77a04SHong Zhang fftw->p_backward = 0; 401*b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 402*b2b77a04SHong Zhang 403*b2b77a04SHong Zhang if (size == 1){ 404*b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 405*b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 406*b2b77a04SHong Zhang } else { 407*b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 408*b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 409*b2b77a04SHong Zhang } 410*b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 411*b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 412*b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 413*b2b77a04SHong Zhang 414*b2b77a04SHong Zhang /* get runtime options */ 415*b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 416*b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 417*b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 418*b2b77a04SHong Zhang PetscOptionsEnd(); 419*b2b77a04SHong Zhang PetscFunctionReturn(0); 420*b2b77a04SHong Zhang } 421*b2b77a04SHong Zhang EXTERN_C_END 422