1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 28 /* MatMult_SeqFFTW performs forward DFT in parallel 29 Input parameter: 30 A - the matrix 31 x - the vector on which FDFT will be performed 32 33 Output parameter: 34 y - vector that stores result of FDFT 35 */ 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatMult_SeqFFTW" 38 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39 { 40 PetscErrorCode ierr; 41 Mat_FFT *fft = (Mat_FFT*)A->data; 42 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43 PetscScalar *x_array,*y_array; 44 PetscInt ndim=fft->ndim,*dim=fft->dim; 45 46 PetscFunctionBegin; 47 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 48 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 49 if (!fftw->p_forward) { /* create a plan, then excute it */ 50 switch (ndim) { 51 case 1: 52 #if defined(PETSC_USE_COMPLEX) 53 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 #else 55 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 56 #endif 57 break; 58 case 2: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 63 #endif 64 break; 65 case 3: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 default: 73 #if defined(PETSC_USE_COMPLEX) 74 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 75 /* bug: dim[3] changes from 10 to 0 '--with-64-bit-indices=1' */ 76 #else 77 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 78 #endif 79 break; 80 } 81 fftw->finarray = x_array; 82 fftw->foutarray = y_array; 83 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 84 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 85 fftw_execute(fftw->p_forward); 86 } else { /* use existing plan */ 87 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 88 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 89 } else { 90 fftw_execute(fftw->p_forward); 91 } 92 } 93 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 94 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 95 PetscFunctionReturn(0); 96 } 97 98 /* MatMultTranspose_SeqFFTW performs serial backward DFT 99 Input parameter: 100 A - the matrix 101 x - the vector on which BDFT will be performed 102 103 Output parameter: 104 y - vector that stores result of BDFT 105 */ 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 109 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 110 { 111 PetscErrorCode ierr; 112 Mat_FFT *fft = (Mat_FFT*)A->data; 113 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 114 PetscScalar *x_array,*y_array; 115 PetscInt ndim=fft->ndim,*dim=fft->dim; 116 117 PetscFunctionBegin; 118 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 119 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 120 if (!fftw->p_backward) { /* create a plan, then excute it */ 121 switch (ndim) { 122 case 1: 123 #if defined(PETSC_USE_COMPLEX) 124 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 125 #else 126 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 127 #endif 128 break; 129 case 2: 130 #if defined(PETSC_USE_COMPLEX) 131 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 132 #else 133 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 134 #endif 135 break; 136 case 3: 137 #if defined(PETSC_USE_COMPLEX) 138 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 139 #else 140 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 141 #endif 142 break; 143 default: 144 #if defined(PETSC_USE_COMPLEX) 145 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 146 #else 147 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 148 #endif 149 break; 150 } 151 fftw->binarray = x_array; 152 fftw->boutarray = y_array; 153 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 154 } else { /* use existing plan */ 155 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 156 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 157 } else { 158 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 159 } 160 } 161 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 162 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 /* MatMult_MPIFFTW performs forward DFT in parallel 167 Input parameter: 168 A - the matrix 169 x - the vector on which FDFT will be performed 170 171 Output parameter: 172 y - vector that stores result of FDFT 173 */ 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatMult_MPIFFTW" 176 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 177 { 178 PetscErrorCode ierr; 179 Mat_FFT *fft = (Mat_FFT*)A->data; 180 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 181 PetscScalar *x_array,*y_array; 182 PetscInt ndim=fft->ndim,*dim=fft->dim; 183 MPI_Comm comm; 184 185 PetscFunctionBegin; 186 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 187 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 188 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 189 if (!fftw->p_forward) { /* create a plan, then excute it */ 190 switch (ndim) { 191 case 1: 192 #if defined(PETSC_USE_COMPLEX) 193 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 194 #else 195 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 196 #endif 197 break; 198 case 2: 199 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 200 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 201 #else 202 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 203 #endif 204 break; 205 case 3: 206 #if defined(PETSC_USE_COMPLEX) 207 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 208 #else 209 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 210 #endif 211 break; 212 default: 213 #if defined(PETSC_USE_COMPLEX) 214 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 215 #else 216 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 217 #endif 218 break; 219 } 220 fftw->finarray = x_array; 221 fftw->foutarray = y_array; 222 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 223 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 224 fftw_execute(fftw->p_forward); 225 } else { /* use existing plan */ 226 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 227 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 228 } else { 229 fftw_execute(fftw->p_forward); 230 } 231 } 232 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 233 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 238 Input parameter: 239 A - the matrix 240 x - the vector on which BDFT will be performed 241 242 Output parameter: 243 y - vector that stores result of BDFT 244 */ 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 247 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 248 { 249 PetscErrorCode ierr; 250 Mat_FFT *fft = (Mat_FFT*)A->data; 251 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 252 PetscScalar *x_array,*y_array; 253 PetscInt ndim=fft->ndim,*dim=fft->dim; 254 MPI_Comm comm; 255 256 PetscFunctionBegin; 257 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 258 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 259 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 260 if (!fftw->p_backward) { /* create a plan, then excute it */ 261 switch (ndim) { 262 case 1: 263 #if defined(PETSC_USE_COMPLEX) 264 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 265 #else 266 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 267 #endif 268 break; 269 case 2: 270 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 271 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 272 #else 273 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 274 #endif 275 break; 276 case 3: 277 #if defined(PETSC_USE_COMPLEX) 278 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 279 #else 280 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 281 #endif 282 break; 283 default: 284 #if defined(PETSC_USE_COMPLEX) 285 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 286 #else 287 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 288 #endif 289 break; 290 } 291 fftw->binarray = x_array; 292 fftw->boutarray = y_array; 293 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 294 } else { /* use existing plan */ 295 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 296 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 297 } else { 298 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 299 } 300 } 301 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 302 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatDestroy_FFTW" 308 PetscErrorCode MatDestroy_FFTW(Mat A) 309 { 310 Mat_FFT *fft = (Mat_FFT*)A->data; 311 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 312 PetscErrorCode ierr; 313 314 PetscFunctionBegin; 315 fftw_destroy_plan(fftw->p_forward); 316 fftw_destroy_plan(fftw->p_backward); 317 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 318 ierr = PetscFree(fft->data);CHKERRQ(ierr); 319 fftw_mpi_cleanup(); 320 PetscFunctionReturn(0); 321 } 322 323 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 324 #undef __FUNCT__ 325 #define __FUNCT__ "VecDestroy_MPIFFTW" 326 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 327 { 328 PetscErrorCode ierr; 329 PetscScalar *array; 330 331 PetscFunctionBegin; 332 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 333 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 334 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 335 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "MatGetVecsFFTW" 341 /*@ 342 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 343 parallel layout determined by FFTW 344 345 Collective on Mat 346 347 Input Parameter: 348 . A - the matrix 349 350 Output Parameter: 351 + x - (optional) input vector of forward FFTW 352 - y - (optional) output vector of forward FFTW 353 - z - (optional) output vector of backward FFTW 354 355 Level: advanced 356 357 Note: The parallel layout of output of forward FFTW is always same as the input 358 of the backward FFTW. But parallel layout of the input vector of forward 359 FFTW might not be same as the output of backward FFTW. 360 Also note that we need to provide enough space while doing parallel real transform. 361 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 362 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 363 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 364 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 365 zeros if it is odd only one zero is needed. 366 Lastly one needs some scratch space at the end of data set in each process. alloc_local 367 figures out how much space is needed, i.e. it figures out the data+scratch space for 368 each processor and returns that. 369 370 .seealso: MatCreateFFTW() 371 @*/ 372 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 373 { 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 378 PetscFunctionReturn(0); 379 } 380 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 383 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 384 { 385 PetscErrorCode ierr; 386 PetscMPIInt size,rank; 387 MPI_Comm comm; 388 Mat_FFT *fft = (Mat_FFT*)A->data; 389 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 390 PetscInt N = fft->N; 391 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 392 393 PetscFunctionBegin; 394 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 395 PetscValidType(A,1); 396 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 397 398 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 399 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 400 if (size == 1) { /* sequential case */ 401 #if defined(PETSC_USE_COMPLEX) 402 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 403 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 404 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 405 #else 406 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 407 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 408 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 409 #endif 410 } else { /* parallel cases */ 411 ptrdiff_t alloc_local,local_n0,local_0_start; 412 ptrdiff_t local_n1; 413 fftw_complex *data_fout; 414 ptrdiff_t local_1_start; 415 #if defined(PETSC_USE_COMPLEX) 416 fftw_complex *data_fin,*data_bout; 417 #else 418 double *data_finr,*data_boutr; 419 PetscInt n1,N1; 420 ptrdiff_t temp; 421 #endif 422 423 switch (ndim) { 424 case 1: 425 #if !defined(PETSC_USE_COMPLEX) 426 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 427 #else 428 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 429 if (fin) { 430 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 431 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 432 433 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 434 } 435 if (fout) { 436 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 437 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 438 439 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 440 } 441 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 442 if (bout) { 443 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 444 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 445 446 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 447 } 448 break; 449 #endif 450 case 2: 451 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 452 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 453 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 454 if (fin) { 455 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 456 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 457 458 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 459 } 460 if (bout) { 461 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 462 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 463 464 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 465 } 466 if (fout) { 467 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 468 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 469 470 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 471 } 472 #else 473 /* Get local size */ 474 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 475 if (fin) { 476 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 477 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 478 479 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 if (fout) { 482 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 484 485 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 486 } 487 if (bout) { 488 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 489 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 490 491 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 492 } 493 #endif 494 break; 495 case 3: 496 #if !defined(PETSC_USE_COMPLEX) 497 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 498 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 499 if (fin) { 500 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 501 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 502 503 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 504 } 505 if (bout) { 506 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 507 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 508 509 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 510 } 511 512 if (fout) { 513 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 514 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 515 516 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 517 } 518 #else 519 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 520 if (fin) { 521 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 522 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 523 524 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 525 } 526 if (fout) { 527 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 528 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 529 530 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 531 } 532 if (bout) { 533 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 534 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 535 536 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 537 } 538 #endif 539 break; 540 default: 541 #if !defined(PETSC_USE_COMPLEX) 542 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 543 544 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 545 546 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 547 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 548 549 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 550 551 if (fin) { 552 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 553 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 554 555 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 556 } 557 if (bout) { 558 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 559 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 560 561 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 562 } 563 564 if (fout) { 565 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 566 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 567 568 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 569 } 570 #else 571 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 572 if (fin) { 573 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 574 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 575 576 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 577 } 578 if (fout) { 579 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 580 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 581 582 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 583 } 584 if (bout) { 585 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 586 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 587 588 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 589 } 590 #endif 591 break; 592 } 593 } 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "VecScatterPetscToFFTW" 599 /*@ 600 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 601 602 Collective on Mat 603 604 Input Parameters: 605 + A - FFTW matrix 606 - x - the PETSc vector 607 608 Output Parameters: 609 . y - the FFTW vector 610 611 Options Database Keys: 612 . -mat_fftw_plannerflags - set FFTW planner flags 613 614 Level: intermediate 615 616 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 617 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 618 zeros. This routine does that job by scattering operation. 619 620 .seealso: VecScatterFFTWToPetsc() 621 @*/ 622 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 623 { 624 PetscErrorCode ierr; 625 626 PetscFunctionBegin; 627 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 633 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 634 { 635 PetscErrorCode ierr; 636 MPI_Comm comm; 637 Mat_FFT *fft = (Mat_FFT*)A->data; 638 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 639 PetscInt N =fft->N; 640 PetscInt ndim =fft->ndim,*dim=fft->dim; 641 PetscInt low; 642 PetscInt rank,size,vsize,vsize1; 643 ptrdiff_t alloc_local,local_n0,local_0_start; 644 ptrdiff_t local_n1,local_1_start; 645 VecScatter vecscat; 646 IS list1,list2; 647 #if !defined(PETSC_USE_COMPLEX) 648 PetscInt i,j,k,partial_dim; 649 PetscInt *indx1, *indx2, tempindx, tempindx1; 650 PetscInt N1, n1,NM; 651 ptrdiff_t temp; 652 #endif 653 654 PetscFunctionBegin; 655 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 656 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 657 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 658 ierr = VecGetOwnershipRange(y,&low,NULL); 659 660 if (size==1) { 661 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 662 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 663 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 664 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 665 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 666 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 667 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 668 ierr = ISDestroy(&list1);CHKERRQ(ierr); 669 } else { 670 switch (ndim) { 671 case 1: 672 #if defined(PETSC_USE_COMPLEX) 673 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 674 675 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 676 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 677 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 678 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 679 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 680 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 681 ierr = ISDestroy(&list1);CHKERRQ(ierr); 682 ierr = ISDestroy(&list2);CHKERRQ(ierr); 683 #else 684 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 685 #endif 686 break; 687 case 2: 688 #if defined(PETSC_USE_COMPLEX) 689 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 690 691 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 692 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 693 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 694 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 695 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 696 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 697 ierr = ISDestroy(&list1);CHKERRQ(ierr); 698 ierr = ISDestroy(&list2);CHKERRQ(ierr); 699 #else 700 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 701 702 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 703 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 704 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 705 706 if (dim[1]%2==0) { 707 NM = dim[1]+2; 708 } else { 709 NM = dim[1]+1; 710 } 711 for (i=0; i<local_n0; i++) { 712 for (j=0; j<dim[1]; j++) { 713 tempindx = i*dim[1] + j; 714 tempindx1 = i*NM + j; 715 716 indx1[tempindx]=local_0_start*dim[1]+tempindx; 717 indx2[tempindx]=low+tempindx1; 718 } 719 } 720 721 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 722 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 723 724 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 725 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 728 ierr = ISDestroy(&list1);CHKERRQ(ierr); 729 ierr = ISDestroy(&list2);CHKERRQ(ierr); 730 ierr = PetscFree(indx1);CHKERRQ(ierr); 731 ierr = PetscFree(indx2);CHKERRQ(ierr); 732 #endif 733 break; 734 735 case 3: 736 #if defined(PETSC_USE_COMPLEX) 737 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 738 739 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 740 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 741 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 742 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 743 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 744 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 745 ierr = ISDestroy(&list1);CHKERRQ(ierr); 746 ierr = ISDestroy(&list2);CHKERRQ(ierr); 747 #else 748 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 749 750 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 751 752 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 753 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 754 755 if (dim[2]%2==0) NM = dim[2]+2; 756 else NM = dim[2]+1; 757 758 for (i=0; i<local_n0; i++) { 759 for (j=0; j<dim[1]; j++) { 760 for (k=0; k<dim[2]; k++) { 761 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 762 tempindx1 = i*dim[1]*NM + j*NM + k; 763 764 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 765 indx2[tempindx]=low+tempindx1; 766 } 767 } 768 } 769 770 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 771 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 772 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 773 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 774 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 776 ierr = ISDestroy(&list1);CHKERRQ(ierr); 777 ierr = ISDestroy(&list2);CHKERRQ(ierr); 778 ierr = PetscFree(indx1);CHKERRQ(ierr); 779 ierr = PetscFree(indx2);CHKERRQ(ierr); 780 #endif 781 break; 782 783 default: 784 #if defined(PETSC_USE_COMPLEX) 785 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 786 787 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 788 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 789 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 790 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 791 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 793 ierr = ISDestroy(&list1);CHKERRQ(ierr); 794 ierr = ISDestroy(&list2);CHKERRQ(ierr); 795 #else 796 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 797 798 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 799 800 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 801 802 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 803 804 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 805 806 partial_dim = fftw->partial_dim; 807 808 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 809 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 810 811 if (dim[ndim-1]%2==0) NM = 2; 812 else NM = 1; 813 814 j = low; 815 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 816 indx1[i] = local_0_start*partial_dim + i; 817 indx2[i] = j; 818 if (k%dim[ndim-1]==0) j+=NM; 819 j++; 820 } 821 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 822 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 823 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 824 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 827 ierr = ISDestroy(&list1);CHKERRQ(ierr); 828 ierr = ISDestroy(&list2);CHKERRQ(ierr); 829 ierr = PetscFree(indx1);CHKERRQ(ierr); 830 ierr = PetscFree(indx2);CHKERRQ(ierr); 831 #endif 832 break; 833 } 834 } 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "VecScatterFFTWToPetsc" 840 /*@ 841 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 842 843 Collective on Mat 844 845 Input Parameters: 846 + A - FFTW matrix 847 - x - FFTW vector 848 849 Output Parameters: 850 . y - PETSc vector 851 852 Level: intermediate 853 854 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 855 VecScatterFFTWToPetsc removes those extra zeros. 856 857 .seealso: VecScatterPetscToFFTW() 858 @*/ 859 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 860 { 861 PetscErrorCode ierr; 862 863 PetscFunctionBegin; 864 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 870 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 871 { 872 PetscErrorCode ierr; 873 MPI_Comm comm; 874 Mat_FFT *fft = (Mat_FFT*)A->data; 875 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 876 PetscInt N = fft->N; 877 PetscInt ndim = fft->ndim,*dim=fft->dim; 878 PetscInt low; 879 PetscInt rank,size; 880 ptrdiff_t alloc_local,local_n0,local_0_start; 881 ptrdiff_t local_n1,local_1_start; 882 VecScatter vecscat; 883 IS list1,list2; 884 #if !defined(PETSC_USE_COMPLEX) 885 PetscInt i,j,k,partial_dim; 886 PetscInt *indx1, *indx2, tempindx, tempindx1; 887 PetscInt N1, n1,NM; 888 ptrdiff_t temp; 889 #endif 890 891 PetscFunctionBegin; 892 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 893 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 894 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 895 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 896 897 if (size==1) { 898 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 899 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 900 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 903 ierr = ISDestroy(&list1);CHKERRQ(ierr); 904 905 } else { 906 switch (ndim) { 907 case 1: 908 #if defined(PETSC_USE_COMPLEX) 909 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 910 911 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 912 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 913 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 914 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 917 ierr = ISDestroy(&list1);CHKERRQ(ierr); 918 ierr = ISDestroy(&list2);CHKERRQ(ierr); 919 #else 920 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 921 #endif 922 break; 923 case 2: 924 #if defined(PETSC_USE_COMPLEX) 925 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 926 927 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 928 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 929 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 930 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 932 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 933 ierr = ISDestroy(&list1);CHKERRQ(ierr); 934 ierr = ISDestroy(&list2);CHKERRQ(ierr); 935 #else 936 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 937 938 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 939 940 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 941 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 942 943 if (dim[1]%2==0) NM = dim[1]+2; 944 else NM = dim[1]+1; 945 946 for (i=0; i<local_n0; i++) { 947 for (j=0; j<dim[1]; j++) { 948 tempindx = i*dim[1] + j; 949 tempindx1 = i*NM + j; 950 951 indx1[tempindx]=local_0_start*dim[1]+tempindx; 952 indx2[tempindx]=low+tempindx1; 953 } 954 } 955 956 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 957 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 958 959 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 960 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 963 ierr = ISDestroy(&list1);CHKERRQ(ierr); 964 ierr = ISDestroy(&list2);CHKERRQ(ierr); 965 ierr = PetscFree(indx1);CHKERRQ(ierr); 966 ierr = PetscFree(indx2);CHKERRQ(ierr); 967 #endif 968 break; 969 case 3: 970 #if defined(PETSC_USE_COMPLEX) 971 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 972 973 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 974 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 975 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 976 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 979 ierr = ISDestroy(&list1);CHKERRQ(ierr); 980 ierr = ISDestroy(&list2);CHKERRQ(ierr); 981 #else 982 983 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 984 985 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 986 987 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 988 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 989 990 if (dim[2]%2==0) NM = dim[2]+2; 991 else NM = dim[2]+1; 992 993 for (i=0; i<local_n0; i++) { 994 for (j=0; j<dim[1]; j++) { 995 for (k=0; k<dim[2]; k++) { 996 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 997 tempindx1 = i*dim[1]*NM + j*NM + k; 998 999 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1000 indx2[tempindx]=low+tempindx1; 1001 } 1002 } 1003 } 1004 1005 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1006 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1007 1008 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1009 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1012 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1013 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1014 ierr = PetscFree(indx1);CHKERRQ(ierr); 1015 ierr = PetscFree(indx2);CHKERRQ(ierr); 1016 #endif 1017 break; 1018 default: 1019 #if defined(PETSC_USE_COMPLEX) 1020 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1021 1022 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1023 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1024 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1025 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1028 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1029 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1030 #else 1031 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1032 1033 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1034 1035 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1036 1037 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1038 1039 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1040 1041 partial_dim = fftw->partial_dim; 1042 1043 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1044 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1045 1046 if (dim[ndim-1]%2==0) NM = 2; 1047 else NM = 1; 1048 1049 j = low; 1050 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1051 indx1[i] = local_0_start*partial_dim + i; 1052 indx2[i] = j; 1053 if (k%dim[ndim-1]==0) j+=NM; 1054 j++; 1055 } 1056 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1057 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1058 1059 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1060 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1062 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1063 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1064 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1065 ierr = PetscFree(indx1);CHKERRQ(ierr); 1066 ierr = PetscFree(indx2);CHKERRQ(ierr); 1067 #endif 1068 break; 1069 } 1070 } 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatCreate_FFTW" 1076 /* 1077 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1078 1079 Options Database Keys: 1080 + -mat_fftw_plannerflags - set FFTW planner flags 1081 1082 Level: intermediate 1083 1084 */ 1085 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1086 { 1087 PetscErrorCode ierr; 1088 MPI_Comm comm; 1089 Mat_FFT *fft=(Mat_FFT*)A->data; 1090 Mat_FFTW *fftw; 1091 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1092 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1093 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1094 PetscBool flg; 1095 PetscInt p_flag,partial_dim=1,ctr; 1096 PetscMPIInt size,rank; 1097 ptrdiff_t *pdim; 1098 ptrdiff_t local_n1,local_1_start; 1099 #if !defined(PETSC_USE_COMPLEX) 1100 ptrdiff_t temp; 1101 PetscInt N1,tot_dim; 1102 #else 1103 PetscInt n1; 1104 #endif 1105 1106 PetscFunctionBegin; 1107 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1108 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1109 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1110 1111 fftw_mpi_init(); 1112 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1113 pdim[0] = dim[0]; 1114 #if !defined(PETSC_USE_COMPLEX) 1115 tot_dim = 2*dim[0]; 1116 #endif 1117 for (ctr=1; ctr<ndim; ctr++) { 1118 partial_dim *= dim[ctr]; 1119 pdim[ctr] = dim[ctr]; 1120 #if !defined(PETSC_USE_COMPLEX) 1121 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1122 else tot_dim *= dim[ctr]; 1123 #endif 1124 } 1125 1126 if (size == 1) { 1127 #if defined(PETSC_USE_COMPLEX) 1128 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1129 n = N; 1130 #else 1131 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1132 n = tot_dim; 1133 #endif 1134 1135 } else { 1136 ptrdiff_t alloc_local,local_n0,local_0_start; 1137 switch (ndim) { 1138 case 1: 1139 #if !defined(PETSC_USE_COMPLEX) 1140 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1141 #else 1142 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1143 1144 n = (PetscInt)local_n0; 1145 n1 = (PetscInt)local_n1; 1146 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1147 #endif 1148 break; 1149 case 2: 1150 #if defined(PETSC_USE_COMPLEX) 1151 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1152 /* 1153 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1154 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1155 */ 1156 n = (PetscInt)local_n0*dim[1]; 1157 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1158 #else 1159 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1160 1161 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1162 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1163 #endif 1164 break; 1165 case 3: 1166 #if defined(PETSC_USE_COMPLEX) 1167 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1168 1169 n = (PetscInt)local_n0*dim[1]*dim[2]; 1170 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1171 #else 1172 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1173 1174 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1175 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1176 #endif 1177 break; 1178 default: 1179 #if defined(PETSC_USE_COMPLEX) 1180 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1181 1182 n = (PetscInt)local_n0*partial_dim; 1183 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1184 #else 1185 temp = pdim[ndim-1]; 1186 1187 pdim[ndim-1] = temp/2 + 1; 1188 1189 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1190 1191 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1192 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1193 1194 pdim[ndim-1] = temp; 1195 1196 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1197 #endif 1198 break; 1199 } 1200 } 1201 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1202 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1203 fft->data = (void*)fftw; 1204 1205 fft->n = n; 1206 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1207 fftw->partial_dim = partial_dim; 1208 1209 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1210 1211 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1212 1213 fftw->p_forward = 0; 1214 fftw->p_backward = 0; 1215 fftw->p_flag = FFTW_ESTIMATE; 1216 1217 if (size == 1) { 1218 A->ops->mult = MatMult_SeqFFTW; 1219 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1220 } else { 1221 A->ops->mult = MatMult_MPIFFTW; 1222 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1223 } 1224 fft->matdestroy = MatDestroy_FFTW; 1225 A->assembled = PETSC_TRUE; 1226 A->preallocated = PETSC_TRUE; 1227 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1231 1232 /* get runtime options */ 1233 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1234 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1235 if (flg) { 1236 fftw->p_flag = iplans[p_flag]; 1237 } 1238 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 1243 1244 1245