1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 #if defined(PETSC_USE_64BIT_INDICES) 15 fftw_iodim64 *iodims; 16 #else 17 fftw_iodim *iodims; 18 #endif 19 PetscInt partial_dim; 20 fftw_plan p_forward,p_backward; 21 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23 executed for the arrays with which the plan was created */ 24 } Mat_FFTW; 25 26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30 extern PetscErrorCode MatDestroy_FFTW(Mat); 31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32 33 /* MatMult_SeqFFTW performs forward DFT in parallel 34 Input parameter: 35 A - the matrix 36 x - the vector on which FDFT will be performed 37 38 Output parameter: 39 y - vector that stores result of FDFT 40 */ 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatMult_SeqFFTW" 43 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 44 { 45 PetscErrorCode ierr; 46 Mat_FFT *fft = (Mat_FFT*)A->data; 47 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 48 PetscScalar *x_array,*y_array; 49 #if defined(PETSC_USE_COMPLEX) 50 #if defined(PETSC_USE_64BIT_INDICES) 51 fftw_iodim64 *iodims; 52 #else 53 fftw_iodim *iodims; 54 #endif 55 PetscInt i; 56 #endif 57 PetscInt ndim = fft->ndim,*dim = fft->dim; 58 59 PetscFunctionBegin; 60 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 61 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 62 if (!fftw->p_forward) { /* create a plan, then excute it */ 63 switch (ndim) { 64 case 1: 65 #if defined(PETSC_USE_COMPLEX) 66 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 67 #else 68 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 69 #endif 70 break; 71 case 2: 72 #if defined(PETSC_USE_COMPLEX) 73 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 74 #else 75 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 76 #endif 77 break; 78 case 3: 79 #if defined(PETSC_USE_COMPLEX) 80 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 81 #else 82 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 83 #endif 84 break; 85 default: 86 #if defined(PETSC_USE_COMPLEX) 87 iodims = fftw->iodims; 88 #if defined(PETSC_USE_64BIT_INDICES) 89 if (ndim) { 90 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 91 iodims[ndim-1].is = iodims[ndim-1].os = 1; 92 for (i=ndim-2; i>=0; --i) { 93 iodims[i].n = (ptrdiff_t)dim[i]; 94 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 95 } 96 } 97 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 98 #else 99 if (ndim) { 100 iodims[ndim-1].n = (int)dim[ndim-1]; 101 iodims[ndim-1].is = iodims[ndim-1].os = 1; 102 for (i=ndim-2; i>=0; --i) { 103 iodims[i].n = (int)dim[i]; 104 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 105 } 106 } 107 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 108 #endif 109 110 #else 111 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 112 #endif 113 break; 114 } 115 fftw->finarray = x_array; 116 fftw->foutarray = y_array; 117 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 118 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 119 fftw_execute(fftw->p_forward); 120 } else { /* use existing plan */ 121 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 122 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 123 } else { 124 fftw_execute(fftw->p_forward); 125 } 126 } 127 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 128 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 /* MatMultTranspose_SeqFFTW performs serial backward DFT 133 Input parameter: 134 A - the matrix 135 x - the vector on which BDFT will be performed 136 137 Output parameter: 138 y - vector that stores result of BDFT 139 */ 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 143 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 144 { 145 PetscErrorCode ierr; 146 Mat_FFT *fft = (Mat_FFT*)A->data; 147 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 148 PetscScalar *x_array,*y_array; 149 PetscInt ndim=fft->ndim,*dim=fft->dim; 150 #if defined(PETSC_USE_COMPLEX) 151 #if defined(PETSC_USE_64BIT_INDICES) 152 fftw_iodim64 *iodims=fftw->iodims; 153 #else 154 fftw_iodim *iodims=fftw->iodims; 155 #endif 156 #endif 157 158 PetscFunctionBegin; 159 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 160 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 161 if (!fftw->p_backward) { /* create a plan, then excute it */ 162 switch (ndim) { 163 case 1: 164 #if defined(PETSC_USE_COMPLEX) 165 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 166 #else 167 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 168 #endif 169 break; 170 case 2: 171 #if defined(PETSC_USE_COMPLEX) 172 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 173 #else 174 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 175 #endif 176 break; 177 case 3: 178 #if defined(PETSC_USE_COMPLEX) 179 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 180 #else 181 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 182 #endif 183 break; 184 default: 185 #if defined(PETSC_USE_COMPLEX) 186 #if defined(PETSC_USE_64BIT_INDICES) 187 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 188 #else 189 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 190 #endif 191 #else 192 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 193 #endif 194 break; 195 } 196 fftw->binarray = x_array; 197 fftw->boutarray = y_array; 198 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 199 } else { /* use existing plan */ 200 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 201 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202 } else { 203 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 204 } 205 } 206 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 /* MatMult_MPIFFTW performs forward DFT in parallel 212 Input parameter: 213 A - the matrix 214 x - the vector on which FDFT will be performed 215 216 Output parameter: 217 y - vector that stores result of FDFT 218 */ 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatMult_MPIFFTW" 221 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 222 { 223 PetscErrorCode ierr; 224 Mat_FFT *fft = (Mat_FFT*)A->data; 225 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 226 PetscScalar *x_array,*y_array; 227 PetscInt ndim=fft->ndim,*dim=fft->dim; 228 MPI_Comm comm; 229 230 PetscFunctionBegin; 231 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 232 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 233 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 234 if (!fftw->p_forward) { /* create a plan, then excute it */ 235 switch (ndim) { 236 case 1: 237 #if defined(PETSC_USE_COMPLEX) 238 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 239 #else 240 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 241 #endif 242 break; 243 case 2: 244 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 245 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 246 #else 247 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 248 #endif 249 break; 250 case 3: 251 #if defined(PETSC_USE_COMPLEX) 252 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 253 #else 254 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 255 #endif 256 break; 257 default: 258 #if defined(PETSC_USE_COMPLEX) 259 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 260 #else 261 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 262 #endif 263 break; 264 } 265 fftw->finarray = x_array; 266 fftw->foutarray = y_array; 267 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 268 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 269 fftw_execute(fftw->p_forward); 270 } else { /* use existing plan */ 271 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 272 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 273 } else { 274 fftw_execute(fftw->p_forward); 275 } 276 } 277 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 278 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 283 Input parameter: 284 A - the matrix 285 x - the vector on which BDFT will be performed 286 287 Output parameter: 288 y - vector that stores result of BDFT 289 */ 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 292 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 293 { 294 PetscErrorCode ierr; 295 Mat_FFT *fft = (Mat_FFT*)A->data; 296 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 297 PetscScalar *x_array,*y_array; 298 PetscInt ndim=fft->ndim,*dim=fft->dim; 299 MPI_Comm comm; 300 301 PetscFunctionBegin; 302 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 303 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 304 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 305 if (!fftw->p_backward) { /* create a plan, then excute it */ 306 switch (ndim) { 307 case 1: 308 #if defined(PETSC_USE_COMPLEX) 309 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 310 #else 311 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 312 #endif 313 break; 314 case 2: 315 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 316 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 317 #else 318 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 319 #endif 320 break; 321 case 3: 322 #if defined(PETSC_USE_COMPLEX) 323 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 324 #else 325 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 326 #endif 327 break; 328 default: 329 #if defined(PETSC_USE_COMPLEX) 330 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 331 #else 332 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 333 #endif 334 break; 335 } 336 fftw->binarray = x_array; 337 fftw->boutarray = y_array; 338 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 339 } else { /* use existing plan */ 340 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 341 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 342 } else { 343 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 344 } 345 } 346 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 347 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 #undef __FUNCT__ 352 #define __FUNCT__ "MatDestroy_FFTW" 353 PetscErrorCode MatDestroy_FFTW(Mat A) 354 { 355 Mat_FFT *fft = (Mat_FFT*)A->data; 356 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 fftw_destroy_plan(fftw->p_forward); 361 fftw_destroy_plan(fftw->p_backward); 362 if (fftw->iodims) { 363 free(fftw->iodims); 364 } 365 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 366 ierr = PetscFree(fft->data);CHKERRQ(ierr); 367 fftw_mpi_cleanup(); 368 PetscFunctionReturn(0); 369 } 370 371 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 372 #undef __FUNCT__ 373 #define __FUNCT__ "VecDestroy_MPIFFTW" 374 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 375 { 376 PetscErrorCode ierr; 377 PetscScalar *array; 378 379 PetscFunctionBegin; 380 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 381 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 382 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 383 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatGetVecsFFTW" 389 /*@ 390 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 391 parallel layout determined by FFTW 392 393 Collective on Mat 394 395 Input Parameter: 396 . A - the matrix 397 398 Output Parameter: 399 + x - (optional) input vector of forward FFTW 400 - y - (optional) output vector of forward FFTW 401 - z - (optional) output vector of backward FFTW 402 403 Level: advanced 404 405 Note: The parallel layout of output of forward FFTW is always same as the input 406 of the backward FFTW. But parallel layout of the input vector of forward 407 FFTW might not be same as the output of backward FFTW. 408 Also note that we need to provide enough space while doing parallel real transform. 409 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 410 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 411 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 412 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 413 zeros if it is odd only one zero is needed. 414 Lastly one needs some scratch space at the end of data set in each process. alloc_local 415 figures out how much space is needed, i.e. it figures out the data+scratch space for 416 each processor and returns that. 417 418 .seealso: MatCreateFFTW() 419 @*/ 420 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 421 { 422 PetscErrorCode ierr; 423 424 PetscFunctionBegin; 425 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 431 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 432 { 433 PetscErrorCode ierr; 434 PetscMPIInt size,rank; 435 MPI_Comm comm; 436 Mat_FFT *fft = (Mat_FFT*)A->data; 437 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 438 PetscInt N = fft->N; 439 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 440 441 PetscFunctionBegin; 442 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 443 PetscValidType(A,1); 444 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 445 446 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 447 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 448 if (size == 1) { /* sequential case */ 449 #if defined(PETSC_USE_COMPLEX) 450 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 451 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 452 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 453 #else 454 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 455 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 456 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 457 #endif 458 } else { /* parallel cases */ 459 ptrdiff_t alloc_local,local_n0,local_0_start; 460 ptrdiff_t local_n1; 461 fftw_complex *data_fout; 462 ptrdiff_t local_1_start; 463 #if defined(PETSC_USE_COMPLEX) 464 fftw_complex *data_fin,*data_bout; 465 #else 466 double *data_finr,*data_boutr; 467 PetscInt n1,N1; 468 ptrdiff_t temp; 469 #endif 470 471 switch (ndim) { 472 case 1: 473 #if !defined(PETSC_USE_COMPLEX) 474 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 475 #else 476 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 477 if (fin) { 478 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 479 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 480 481 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 482 } 483 if (fout) { 484 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 485 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 486 487 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 488 } 489 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 490 if (bout) { 491 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 492 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 493 494 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 495 } 496 break; 497 #endif 498 case 2: 499 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 500 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 501 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 502 if (fin) { 503 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 504 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 505 506 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 507 } 508 if (bout) { 509 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 510 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 511 512 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 513 } 514 if (fout) { 515 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 516 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 517 518 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 #else 521 /* Get local size */ 522 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 523 if (fin) { 524 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 525 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 526 527 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 528 } 529 if (fout) { 530 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 531 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 532 533 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 534 } 535 if (bout) { 536 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 538 539 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 540 } 541 #endif 542 break; 543 case 3: 544 #if !defined(PETSC_USE_COMPLEX) 545 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 546 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 547 if (fin) { 548 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 549 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 550 551 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 if (bout) { 554 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 555 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 556 557 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 558 } 559 560 if (fout) { 561 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 562 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 563 564 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 565 } 566 #else 567 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 568 if (fin) { 569 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 570 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 571 572 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 573 } 574 if (fout) { 575 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 576 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 577 578 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 579 } 580 if (bout) { 581 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 582 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 583 584 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 585 } 586 #endif 587 break; 588 default: 589 #if !defined(PETSC_USE_COMPLEX) 590 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 591 592 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 593 594 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 595 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 596 597 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 598 599 if (fin) { 600 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 601 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 602 603 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 604 } 605 if (bout) { 606 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 607 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 608 609 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 610 } 611 612 if (fout) { 613 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 614 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 615 616 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 617 } 618 #else 619 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 620 if (fin) { 621 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 622 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 623 624 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 625 } 626 if (fout) { 627 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 628 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 629 630 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 631 } 632 if (bout) { 633 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 634 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 635 636 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 637 } 638 #endif 639 break; 640 } 641 } 642 PetscFunctionReturn(0); 643 } 644 645 #undef __FUNCT__ 646 #define __FUNCT__ "VecScatterPetscToFFTW" 647 /*@ 648 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 649 650 Collective on Mat 651 652 Input Parameters: 653 + A - FFTW matrix 654 - x - the PETSc vector 655 656 Output Parameters: 657 . y - the FFTW vector 658 659 Options Database Keys: 660 . -mat_fftw_plannerflags - set FFTW planner flags 661 662 Level: intermediate 663 664 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 665 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 666 zeros. This routine does that job by scattering operation. 667 668 .seealso: VecScatterFFTWToPetsc() 669 @*/ 670 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 671 { 672 PetscErrorCode ierr; 673 674 PetscFunctionBegin; 675 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 681 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 682 { 683 PetscErrorCode ierr; 684 MPI_Comm comm; 685 Mat_FFT *fft = (Mat_FFT*)A->data; 686 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 687 PetscInt N =fft->N; 688 PetscInt ndim =fft->ndim,*dim=fft->dim; 689 PetscInt low; 690 PetscMPIInt rank,size; 691 PetscInt vsize,vsize1; 692 ptrdiff_t local_n0,local_0_start; 693 ptrdiff_t local_n1,local_1_start; 694 VecScatter vecscat; 695 IS list1,list2; 696 #if !defined(PETSC_USE_COMPLEX) 697 PetscInt i,j,k,partial_dim; 698 PetscInt *indx1, *indx2, tempindx, tempindx1; 699 PetscInt N1,n1,NM; 700 ptrdiff_t temp; 701 #endif 702 703 PetscFunctionBegin; 704 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 705 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 706 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 707 ierr = VecGetOwnershipRange(y,&low,NULL); 708 709 if (size==1) { 710 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 711 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 712 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 713 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 714 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 715 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 716 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 717 ierr = ISDestroy(&list1);CHKERRQ(ierr); 718 } else { 719 switch (ndim) { 720 case 1: 721 #if defined(PETSC_USE_COMPLEX) 722 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 723 724 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 725 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 726 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 727 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 728 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 730 ierr = ISDestroy(&list1);CHKERRQ(ierr); 731 ierr = ISDestroy(&list2);CHKERRQ(ierr); 732 #else 733 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 734 #endif 735 break; 736 case 2: 737 #if defined(PETSC_USE_COMPLEX) 738 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 739 740 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 741 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 742 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 743 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 744 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 745 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 746 ierr = ISDestroy(&list1);CHKERRQ(ierr); 747 ierr = ISDestroy(&list2);CHKERRQ(ierr); 748 #else 749 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 750 751 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 752 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 753 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 754 755 if (dim[1]%2==0) { 756 NM = dim[1]+2; 757 } else { 758 NM = dim[1]+1; 759 } 760 for (i=0; i<local_n0; i++) { 761 for (j=0; j<dim[1]; j++) { 762 tempindx = i*dim[1] + j; 763 tempindx1 = i*NM + j; 764 765 indx1[tempindx]=local_0_start*dim[1]+tempindx; 766 indx2[tempindx]=low+tempindx1; 767 } 768 } 769 770 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 771 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 772 773 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 774 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 776 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 777 ierr = ISDestroy(&list1);CHKERRQ(ierr); 778 ierr = ISDestroy(&list2);CHKERRQ(ierr); 779 ierr = PetscFree(indx1);CHKERRQ(ierr); 780 ierr = PetscFree(indx2);CHKERRQ(ierr); 781 #endif 782 break; 783 784 case 3: 785 #if defined(PETSC_USE_COMPLEX) 786 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 787 788 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 789 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 790 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 791 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 794 ierr = ISDestroy(&list1);CHKERRQ(ierr); 795 ierr = ISDestroy(&list2);CHKERRQ(ierr); 796 #else 797 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 798 799 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 800 801 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 802 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 803 804 if (dim[2]%2==0) NM = dim[2]+2; 805 else NM = dim[2]+1; 806 807 for (i=0; i<local_n0; i++) { 808 for (j=0; j<dim[1]; j++) { 809 for (k=0; k<dim[2]; k++) { 810 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 811 tempindx1 = i*dim[1]*NM + j*NM + k; 812 813 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 814 indx2[tempindx]=low+tempindx1; 815 } 816 } 817 } 818 819 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 820 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 821 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 822 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 825 ierr = ISDestroy(&list1);CHKERRQ(ierr); 826 ierr = ISDestroy(&list2);CHKERRQ(ierr); 827 ierr = PetscFree(indx1);CHKERRQ(ierr); 828 ierr = PetscFree(indx2);CHKERRQ(ierr); 829 #endif 830 break; 831 832 default: 833 #if defined(PETSC_USE_COMPLEX) 834 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 835 836 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 837 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 838 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 839 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 840 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 841 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 842 ierr = ISDestroy(&list1);CHKERRQ(ierr); 843 ierr = ISDestroy(&list2);CHKERRQ(ierr); 844 #else 845 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 846 847 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 848 849 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 850 851 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 852 853 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 854 855 partial_dim = fftw->partial_dim; 856 857 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 858 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 859 860 if (dim[ndim-1]%2==0) NM = 2; 861 else NM = 1; 862 863 j = low; 864 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 865 indx1[i] = local_0_start*partial_dim + i; 866 indx2[i] = j; 867 if (k%dim[ndim-1]==0) j+=NM; 868 j++; 869 } 870 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 871 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 872 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 873 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 876 ierr = ISDestroy(&list1);CHKERRQ(ierr); 877 ierr = ISDestroy(&list2);CHKERRQ(ierr); 878 ierr = PetscFree(indx1);CHKERRQ(ierr); 879 ierr = PetscFree(indx2);CHKERRQ(ierr); 880 #endif 881 break; 882 } 883 } 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "VecScatterFFTWToPetsc" 889 /*@ 890 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 891 892 Collective on Mat 893 894 Input Parameters: 895 + A - FFTW matrix 896 - x - FFTW vector 897 898 Output Parameters: 899 . y - PETSc vector 900 901 Level: intermediate 902 903 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 904 VecScatterFFTWToPetsc removes those extra zeros. 905 906 .seealso: VecScatterPetscToFFTW() 907 @*/ 908 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 909 { 910 PetscErrorCode ierr; 911 912 PetscFunctionBegin; 913 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 914 PetscFunctionReturn(0); 915 } 916 917 #undef __FUNCT__ 918 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 919 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 920 { 921 PetscErrorCode ierr; 922 MPI_Comm comm; 923 Mat_FFT *fft = (Mat_FFT*)A->data; 924 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 925 PetscInt N = fft->N; 926 PetscInt ndim = fft->ndim,*dim=fft->dim; 927 PetscInt low; 928 PetscMPIInt rank,size; 929 ptrdiff_t local_n0,local_0_start; 930 ptrdiff_t local_n1,local_1_start; 931 VecScatter vecscat; 932 IS list1,list2; 933 #if !defined(PETSC_USE_COMPLEX) 934 PetscInt i,j,k,partial_dim; 935 PetscInt *indx1, *indx2, tempindx, tempindx1; 936 PetscInt N1, n1,NM; 937 ptrdiff_t temp; 938 #endif 939 940 PetscFunctionBegin; 941 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 942 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 943 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 944 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 945 946 if (size==1) { 947 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 948 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 949 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 952 ierr = ISDestroy(&list1);CHKERRQ(ierr); 953 954 } else { 955 switch (ndim) { 956 case 1: 957 #if defined(PETSC_USE_COMPLEX) 958 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 959 960 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 961 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 962 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 963 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 964 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 966 ierr = ISDestroy(&list1);CHKERRQ(ierr); 967 ierr = ISDestroy(&list2);CHKERRQ(ierr); 968 #else 969 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 970 #endif 971 break; 972 case 2: 973 #if defined(PETSC_USE_COMPLEX) 974 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 975 976 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 977 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 978 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 979 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 982 ierr = ISDestroy(&list1);CHKERRQ(ierr); 983 ierr = ISDestroy(&list2);CHKERRQ(ierr); 984 #else 985 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 986 987 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 988 989 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 990 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 991 992 if (dim[1]%2==0) NM = dim[1]+2; 993 else NM = dim[1]+1; 994 995 for (i=0; i<local_n0; i++) { 996 for (j=0; j<dim[1]; j++) { 997 tempindx = i*dim[1] + j; 998 tempindx1 = i*NM + j; 999 1000 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1001 indx2[tempindx]=low+tempindx1; 1002 } 1003 } 1004 1005 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1006 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1007 1008 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1009 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1012 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1013 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1014 ierr = PetscFree(indx1);CHKERRQ(ierr); 1015 ierr = PetscFree(indx2);CHKERRQ(ierr); 1016 #endif 1017 break; 1018 case 3: 1019 #if defined(PETSC_USE_COMPLEX) 1020 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1021 1022 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1023 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1024 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1025 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1028 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1029 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1030 #else 1031 1032 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1033 1034 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1035 1036 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1037 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1038 1039 if (dim[2]%2==0) NM = dim[2]+2; 1040 else NM = dim[2]+1; 1041 1042 for (i=0; i<local_n0; i++) { 1043 for (j=0; j<dim[1]; j++) { 1044 for (k=0; k<dim[2]; k++) { 1045 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1046 tempindx1 = i*dim[1]*NM + j*NM + k; 1047 1048 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1049 indx2[tempindx]=low+tempindx1; 1050 } 1051 } 1052 } 1053 1054 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1055 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1056 1057 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1058 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1059 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1060 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1061 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1062 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1063 ierr = PetscFree(indx1);CHKERRQ(ierr); 1064 ierr = PetscFree(indx2);CHKERRQ(ierr); 1065 #endif 1066 break; 1067 default: 1068 #if defined(PETSC_USE_COMPLEX) 1069 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1070 1071 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1072 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1073 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1074 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1076 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1077 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1078 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1079 #else 1080 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1081 1082 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1083 1084 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1085 1086 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1087 1088 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1089 1090 partial_dim = fftw->partial_dim; 1091 1092 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1093 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1094 1095 if (dim[ndim-1]%2==0) NM = 2; 1096 else NM = 1; 1097 1098 j = low; 1099 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1100 indx1[i] = local_0_start*partial_dim + i; 1101 indx2[i] = j; 1102 if (k%dim[ndim-1]==0) j+=NM; 1103 j++; 1104 } 1105 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1107 1108 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1109 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1112 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1113 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1114 ierr = PetscFree(indx1);CHKERRQ(ierr); 1115 ierr = PetscFree(indx2);CHKERRQ(ierr); 1116 #endif 1117 break; 1118 } 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatCreate_FFTW" 1125 /* 1126 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1127 1128 Options Database Keys: 1129 + -mat_fftw_plannerflags - set FFTW planner flags 1130 1131 Level: intermediate 1132 1133 */ 1134 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1135 { 1136 PetscErrorCode ierr; 1137 MPI_Comm comm; 1138 Mat_FFT *fft=(Mat_FFT*)A->data; 1139 Mat_FFTW *fftw; 1140 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1141 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1142 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1143 PetscBool flg; 1144 PetscInt p_flag,partial_dim=1,ctr; 1145 PetscMPIInt size,rank; 1146 ptrdiff_t *pdim; 1147 ptrdiff_t local_n1,local_1_start; 1148 #if !defined(PETSC_USE_COMPLEX) 1149 ptrdiff_t temp; 1150 PetscInt N1,tot_dim; 1151 #else 1152 PetscInt n1; 1153 #endif 1154 1155 PetscFunctionBegin; 1156 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1157 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1158 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1159 1160 fftw_mpi_init(); 1161 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1162 pdim[0] = dim[0]; 1163 #if !defined(PETSC_USE_COMPLEX) 1164 tot_dim = 2*dim[0]; 1165 #endif 1166 for (ctr=1; ctr<ndim; ctr++) { 1167 partial_dim *= dim[ctr]; 1168 pdim[ctr] = dim[ctr]; 1169 #if !defined(PETSC_USE_COMPLEX) 1170 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1171 else tot_dim *= dim[ctr]; 1172 #endif 1173 } 1174 1175 if (size == 1) { 1176 #if defined(PETSC_USE_COMPLEX) 1177 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1178 n = N; 1179 #else 1180 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1181 n = tot_dim; 1182 #endif 1183 1184 } else { 1185 ptrdiff_t local_n0,local_0_start; 1186 switch (ndim) { 1187 case 1: 1188 #if !defined(PETSC_USE_COMPLEX) 1189 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1190 #else 1191 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1192 1193 n = (PetscInt)local_n0; 1194 n1 = (PetscInt)local_n1; 1195 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1196 #endif 1197 break; 1198 case 2: 1199 #if defined(PETSC_USE_COMPLEX) 1200 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1201 /* 1202 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1203 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1204 */ 1205 n = (PetscInt)local_n0*dim[1]; 1206 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1207 #else 1208 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1209 1210 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1211 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1212 #endif 1213 break; 1214 case 3: 1215 #if defined(PETSC_USE_COMPLEX) 1216 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1217 1218 n = (PetscInt)local_n0*dim[1]*dim[2]; 1219 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1220 #else 1221 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1222 1223 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1224 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1225 #endif 1226 break; 1227 default: 1228 #if defined(PETSC_USE_COMPLEX) 1229 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1230 1231 n = (PetscInt)local_n0*partial_dim; 1232 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1233 #else 1234 temp = pdim[ndim-1]; 1235 1236 pdim[ndim-1] = temp/2 + 1; 1237 1238 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1239 1240 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1241 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1242 1243 pdim[ndim-1] = temp; 1244 1245 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1246 #endif 1247 break; 1248 } 1249 } 1250 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1251 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1252 fft->data = (void*)fftw; 1253 1254 fft->n = n; 1255 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1256 fftw->partial_dim = partial_dim; 1257 1258 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1259 if (size == 1) { 1260 #if defined(PETSC_USE_64BIT_INDICES) 1261 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1262 #else 1263 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1264 #endif 1265 } 1266 1267 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1268 1269 fftw->p_forward = 0; 1270 fftw->p_backward = 0; 1271 fftw->p_flag = FFTW_ESTIMATE; 1272 1273 if (size == 1) { 1274 A->ops->mult = MatMult_SeqFFTW; 1275 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1276 } else { 1277 A->ops->mult = MatMult_MPIFFTW; 1278 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1279 } 1280 fft->matdestroy = MatDestroy_FFTW; 1281 A->assembled = PETSC_TRUE; 1282 A->preallocated = PETSC_TRUE; 1283 1284 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1287 1288 /* get runtime options */ 1289 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1290 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1291 if (flg) { 1292 fftw->p_flag = iplans[p_flag]; 1293 } 1294 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 1299 1300 1301