1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 fftw_iodim *iodims; 15 PetscInt partial_dim; 16 fftw_plan p_forward,p_backward; 17 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 18 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 19 executed for the arrays with which the plan was created */ 20 } Mat_FFTW; 21 22 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 26 extern PetscErrorCode MatDestroy_FFTW(Mat); 27 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 28 29 /* MatMult_SeqFFTW performs forward DFT in parallel 30 Input parameter: 31 A - the matrix 32 x - the vector on which FDFT will be performed 33 34 Output parameter: 35 y - vector that stores result of FDFT 36 */ 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatMult_SeqFFTW" 39 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 40 { 41 PetscErrorCode ierr; 42 Mat_FFT *fft = (Mat_FFT*)A->data; 43 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 44 PetscScalar *x_array,*y_array; 45 PetscInt ndim = fft->ndim,*dim = fft->dim,i; 46 //int *dim_32,i; 47 fftw_iodim *iodims; 48 49 PetscFunctionBegin; 50 //ierr = PetscMalloc(ndim*sizeof(int),&dim_32);CHKERRQ(ierr); 51 //for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i]; 52 53 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 54 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 55 if (!fftw->p_forward) { /* create a plan, then excute it */ 56 switch (ndim) { 57 case 1: 58 #if defined(PETSC_USE_COMPLEX) 59 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 60 #else 61 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 62 #endif 63 break; 64 case 2: 65 #if defined(PETSC_USE_COMPLEX) 66 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 67 #else 68 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 69 #endif 70 break; 71 case 3: 72 #if defined(PETSC_USE_COMPLEX) 73 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 74 //fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,FFTW_MEASURE); 75 #else 76 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 77 #endif 78 break; 79 default: 80 #if defined(PETSC_USE_COMPLEX) 81 //fftw->p_forward = fftw_plan_dft((int)ndim,(int*)dim_32,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 82 /* bug: dim[3] changes from 10 to 0 '--with-64-bit-indices=1' */ 83 84 iodims = (fftw_iodim*)fftw->iodims; 85 //ierr = PetscMalloc(ndim*sizeof(fftw_iodim),&iodims);CHKERRQ(ierr); 86 if (ndim) { 87 iodims[ndim-1].n = dim[ndim-1]; 88 iodims[ndim-1].is = iodims[ndim-1].os = 1; 89 for (i=ndim-2; i>=0; --i) { 90 iodims[i].n = dim[i]; 91 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 92 } 93 } 94 95 fftw->p_forward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 96 97 #else 98 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 99 #endif 100 break; 101 } 102 fftw->finarray = x_array; 103 fftw->foutarray = y_array; 104 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 105 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 106 fftw_execute(fftw->p_forward); 107 } else { /* use existing plan */ 108 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 109 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 110 } else { 111 fftw_execute(fftw->p_forward); 112 } 113 } 114 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 115 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 116 //ierr = PetscFree(dim_32);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 /* MatMultTranspose_SeqFFTW performs serial backward DFT 121 Input parameter: 122 A - the matrix 123 x - the vector on which BDFT will be performed 124 125 Output parameter: 126 y - vector that stores result of BDFT 127 */ 128 129 #undef __FUNCT__ 130 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 131 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 132 { 133 PetscErrorCode ierr; 134 Mat_FFT *fft = (Mat_FFT*)A->data; 135 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 136 PetscScalar *x_array,*y_array; 137 PetscInt ndim=fft->ndim,*dim=fft->dim; 138 fftw_iodim *iodims=(fftw_iodim*)fftw->iodims; 139 //int *dim_32,i; /* for 64-bit */ 140 141 PetscFunctionBegin; 142 //ierr = PetscMalloc(ndim*sizeof(int),&dim_32); 143 //for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i]; 144 145 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 146 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 147 if (!fftw->p_backward) { /* create a plan, then excute it */ 148 switch (ndim) { 149 case 1: 150 #if defined(PETSC_USE_COMPLEX) 151 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 152 #else 153 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 154 #endif 155 break; 156 case 2: 157 #if defined(PETSC_USE_COMPLEX) 158 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 159 #else 160 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 161 #endif 162 break; 163 case 3: 164 #if defined(PETSC_USE_COMPLEX) 165 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 166 //fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,FFTW_MEASURE); 167 #else 168 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 169 #endif 170 break; 171 default: 172 #if defined(PETSC_USE_COMPLEX) 173 //fftw->p_backward = fftw_plan_dft((int)ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 174 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 175 #else 176 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 177 #endif 178 break; 179 } 180 fftw->binarray = x_array; 181 fftw->boutarray = y_array; 182 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 183 } else { /* use existing plan */ 184 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 185 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 186 } else { 187 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 188 } 189 } 190 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 191 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 192 //ierr = PetscFree(dim_32);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 /* MatMult_MPIFFTW performs forward DFT in parallel 197 Input parameter: 198 A - the matrix 199 x - the vector on which FDFT will be performed 200 201 Output parameter: 202 y - vector that stores result of FDFT 203 */ 204 #undef __FUNCT__ 205 #define __FUNCT__ "MatMult_MPIFFTW" 206 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 207 { 208 PetscErrorCode ierr; 209 Mat_FFT *fft = (Mat_FFT*)A->data; 210 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 211 PetscScalar *x_array,*y_array; 212 PetscInt ndim=fft->ndim,*dim=fft->dim; 213 MPI_Comm comm; 214 215 PetscFunctionBegin; 216 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 217 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 218 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 219 if (!fftw->p_forward) { /* create a plan, then excute it */ 220 switch (ndim) { 221 case 1: 222 #if defined(PETSC_USE_COMPLEX) 223 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 224 #else 225 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 226 #endif 227 break; 228 case 2: 229 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 230 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 231 #else 232 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 233 #endif 234 break; 235 case 3: 236 #if defined(PETSC_USE_COMPLEX) 237 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 238 #else 239 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 240 #endif 241 break; 242 default: 243 #if defined(PETSC_USE_COMPLEX) 244 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 245 #else 246 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 247 #endif 248 break; 249 } 250 fftw->finarray = x_array; 251 fftw->foutarray = y_array; 252 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 253 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 254 fftw_execute(fftw->p_forward); 255 } else { /* use existing plan */ 256 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 257 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 258 } else { 259 fftw_execute(fftw->p_forward); 260 } 261 } 262 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 263 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 264 PetscFunctionReturn(0); 265 } 266 267 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 268 Input parameter: 269 A - the matrix 270 x - the vector on which BDFT will be performed 271 272 Output parameter: 273 y - vector that stores result of BDFT 274 */ 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 277 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 278 { 279 PetscErrorCode ierr; 280 Mat_FFT *fft = (Mat_FFT*)A->data; 281 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 282 PetscScalar *x_array,*y_array; 283 PetscInt ndim=fft->ndim,*dim=fft->dim; 284 MPI_Comm comm; 285 286 PetscFunctionBegin; 287 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 288 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 289 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 290 if (!fftw->p_backward) { /* create a plan, then excute it */ 291 switch (ndim) { 292 case 1: 293 #if defined(PETSC_USE_COMPLEX) 294 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 295 #else 296 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 297 #endif 298 break; 299 case 2: 300 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 301 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 302 #else 303 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 304 #endif 305 break; 306 case 3: 307 #if defined(PETSC_USE_COMPLEX) 308 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 309 #else 310 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 311 #endif 312 break; 313 default: 314 #if defined(PETSC_USE_COMPLEX) 315 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 316 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 317 #else 318 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 319 #endif 320 break; 321 } 322 fftw->binarray = x_array; 323 fftw->boutarray = y_array; 324 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 325 } else { /* use existing plan */ 326 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 327 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 328 } else { 329 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 330 } 331 } 332 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 333 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatDestroy_FFTW" 339 PetscErrorCode MatDestroy_FFTW(Mat A) 340 { 341 Mat_FFT *fft = (Mat_FFT*)A->data; 342 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 343 PetscErrorCode ierr; 344 345 PetscFunctionBegin; 346 fftw_destroy_plan(fftw->p_forward); 347 fftw_destroy_plan(fftw->p_backward); 348 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 349 if (fftw->iodims) { 350 /* ierr = PetscFree(fftw->iodims);CHKERRQ(ierr); */ 351 free(fftw->iodims); 352 } 353 ierr = PetscFree(fft->data);CHKERRQ(ierr); 354 fftw_mpi_cleanup(); 355 PetscFunctionReturn(0); 356 } 357 358 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 359 #undef __FUNCT__ 360 #define __FUNCT__ "VecDestroy_MPIFFTW" 361 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 362 { 363 PetscErrorCode ierr; 364 PetscScalar *array; 365 366 PetscFunctionBegin; 367 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 368 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 369 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 370 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 371 PetscFunctionReturn(0); 372 } 373 374 #undef __FUNCT__ 375 #define __FUNCT__ "MatGetVecsFFTW" 376 /*@ 377 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 378 parallel layout determined by FFTW 379 380 Collective on Mat 381 382 Input Parameter: 383 . A - the matrix 384 385 Output Parameter: 386 + x - (optional) input vector of forward FFTW 387 - y - (optional) output vector of forward FFTW 388 - z - (optional) output vector of backward FFTW 389 390 Level: advanced 391 392 Note: The parallel layout of output of forward FFTW is always same as the input 393 of the backward FFTW. But parallel layout of the input vector of forward 394 FFTW might not be same as the output of backward FFTW. 395 Also note that we need to provide enough space while doing parallel real transform. 396 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 397 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 398 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 399 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 400 zeros if it is odd only one zero is needed. 401 Lastly one needs some scratch space at the end of data set in each process. alloc_local 402 figures out how much space is needed, i.e. it figures out the data+scratch space for 403 each processor and returns that. 404 405 .seealso: MatCreateFFTW() 406 @*/ 407 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 408 { 409 PetscErrorCode ierr; 410 411 PetscFunctionBegin; 412 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 418 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 419 { 420 PetscErrorCode ierr; 421 PetscMPIInt size,rank; 422 MPI_Comm comm; 423 Mat_FFT *fft = (Mat_FFT*)A->data; 424 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 425 PetscInt N = fft->N; 426 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 427 428 PetscFunctionBegin; 429 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 430 PetscValidType(A,1); 431 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 432 433 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 434 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 435 if (size == 1) { /* sequential case */ 436 #if defined(PETSC_USE_COMPLEX) 437 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 438 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 439 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 440 #else 441 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 442 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 443 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 444 #endif 445 } else { /* parallel cases */ 446 ptrdiff_t alloc_local,local_n0,local_0_start; 447 ptrdiff_t local_n1; 448 fftw_complex *data_fout; 449 ptrdiff_t local_1_start; 450 #if defined(PETSC_USE_COMPLEX) 451 fftw_complex *data_fin,*data_bout; 452 #else 453 double *data_finr,*data_boutr; 454 PetscInt n1,N1; 455 ptrdiff_t temp; 456 #endif 457 458 switch (ndim) { 459 case 1: 460 #if !defined(PETSC_USE_COMPLEX) 461 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 462 #else 463 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 464 if (fin) { 465 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 466 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 467 468 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 469 } 470 if (fout) { 471 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 472 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 473 474 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 475 } 476 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 477 if (bout) { 478 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 479 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 480 481 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 482 } 483 break; 484 #endif 485 case 2: 486 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 487 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 488 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 489 if (fin) { 490 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 491 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 492 493 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 494 } 495 if (bout) { 496 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 497 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 498 499 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 500 } 501 if (fout) { 502 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 503 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 504 505 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 506 } 507 #else 508 /* Get local size */ 509 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 510 if (fin) { 511 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 512 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 513 514 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 515 } 516 if (fout) { 517 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 518 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 519 520 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 521 } 522 if (bout) { 523 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 524 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 525 526 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 527 } 528 #endif 529 break; 530 case 3: 531 #if !defined(PETSC_USE_COMPLEX) 532 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 533 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 534 if (fin) { 535 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 536 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 537 538 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 539 } 540 if (bout) { 541 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 542 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 543 544 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 545 } 546 547 if (fout) { 548 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 549 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 550 551 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 #else 554 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 555 if (fin) { 556 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 557 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 558 559 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 560 } 561 if (fout) { 562 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 563 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 564 565 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 566 } 567 if (bout) { 568 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 569 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 570 571 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 572 } 573 #endif 574 break; 575 default: 576 #if !defined(PETSC_USE_COMPLEX) 577 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 578 579 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 580 581 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 582 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 583 584 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 585 586 if (fin) { 587 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 588 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 589 590 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 591 } 592 if (bout) { 593 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 594 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 595 596 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 597 } 598 599 if (fout) { 600 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 601 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 602 603 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 604 } 605 #else 606 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 607 if (fin) { 608 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 609 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 610 611 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 612 } 613 if (fout) { 614 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 615 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 616 617 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 618 } 619 if (bout) { 620 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 621 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 622 623 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 624 } 625 #endif 626 break; 627 } 628 } 629 PetscFunctionReturn(0); 630 } 631 632 #undef __FUNCT__ 633 #define __FUNCT__ "VecScatterPetscToFFTW" 634 /*@ 635 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 636 637 Collective on Mat 638 639 Input Parameters: 640 + A - FFTW matrix 641 - x - the PETSc vector 642 643 Output Parameters: 644 . y - the FFTW vector 645 646 Options Database Keys: 647 . -mat_fftw_plannerflags - set FFTW planner flags 648 649 Level: intermediate 650 651 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 652 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 653 zeros. This routine does that job by scattering operation. 654 655 .seealso: VecScatterFFTWToPetsc() 656 @*/ 657 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 658 { 659 PetscErrorCode ierr; 660 661 PetscFunctionBegin; 662 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 668 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 669 { 670 PetscErrorCode ierr; 671 MPI_Comm comm; 672 Mat_FFT *fft = (Mat_FFT*)A->data; 673 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 674 PetscInt N =fft->N; 675 PetscInt ndim =fft->ndim,*dim=fft->dim; 676 PetscInt low; 677 PetscMPIInt rank,size; 678 PetscInt vsize,vsize1; 679 //ptrdiff_t alloc_local,local_n0,local_0_start; 680 ptrdiff_t local_n0,local_0_start; 681 ptrdiff_t local_n1,local_1_start; 682 VecScatter vecscat; 683 IS list1,list2; 684 #if !defined(PETSC_USE_COMPLEX) 685 PetscInt i,j,k,partial_dim; 686 PetscInt *indx1, *indx2, tempindx, tempindx1; 687 PetscInt N1, n1,NM; 688 ptrdiff_t temp; 689 #endif 690 691 PetscFunctionBegin; 692 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 693 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 694 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 695 ierr = VecGetOwnershipRange(y,&low,NULL); 696 697 if (size==1) { 698 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 699 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 700 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 701 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 702 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 703 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 704 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 705 ierr = ISDestroy(&list1);CHKERRQ(ierr); 706 } else { 707 switch (ndim) { 708 case 1: 709 #if defined(PETSC_USE_COMPLEX) 710 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 711 712 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 713 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 714 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 715 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 716 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 717 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 718 ierr = ISDestroy(&list1);CHKERRQ(ierr); 719 ierr = ISDestroy(&list2);CHKERRQ(ierr); 720 #else 721 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 722 #endif 723 break; 724 case 2: 725 #if defined(PETSC_USE_COMPLEX) 726 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 727 728 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 729 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 730 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 731 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 732 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 733 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 734 ierr = ISDestroy(&list1);CHKERRQ(ierr); 735 ierr = ISDestroy(&list2);CHKERRQ(ierr); 736 #else 737 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 738 739 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 740 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 741 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 742 743 if (dim[1]%2==0) { 744 NM = dim[1]+2; 745 } else { 746 NM = dim[1]+1; 747 } 748 for (i=0; i<local_n0; i++) { 749 for (j=0; j<dim[1]; j++) { 750 tempindx = i*dim[1] + j; 751 tempindx1 = i*NM + j; 752 753 indx1[tempindx]=local_0_start*dim[1]+tempindx; 754 indx2[tempindx]=low+tempindx1; 755 } 756 } 757 758 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 759 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 760 761 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 762 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 763 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 764 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 765 ierr = ISDestroy(&list1);CHKERRQ(ierr); 766 ierr = ISDestroy(&list2);CHKERRQ(ierr); 767 ierr = PetscFree(indx1);CHKERRQ(ierr); 768 ierr = PetscFree(indx2);CHKERRQ(ierr); 769 #endif 770 break; 771 772 case 3: 773 #if defined(PETSC_USE_COMPLEX) 774 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 775 776 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 777 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 778 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 779 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 780 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 781 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 782 ierr = ISDestroy(&list1);CHKERRQ(ierr); 783 ierr = ISDestroy(&list2);CHKERRQ(ierr); 784 #else 785 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 786 787 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 788 789 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 790 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 791 792 if (dim[2]%2==0) NM = dim[2]+2; 793 else NM = dim[2]+1; 794 795 for (i=0; i<local_n0; i++) { 796 for (j=0; j<dim[1]; j++) { 797 for (k=0; k<dim[2]; k++) { 798 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 799 tempindx1 = i*dim[1]*NM + j*NM + k; 800 801 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 802 indx2[tempindx]=low+tempindx1; 803 } 804 } 805 } 806 807 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 808 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 809 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 810 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 811 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 812 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 813 ierr = ISDestroy(&list1);CHKERRQ(ierr); 814 ierr = ISDestroy(&list2);CHKERRQ(ierr); 815 ierr = PetscFree(indx1);CHKERRQ(ierr); 816 ierr = PetscFree(indx2);CHKERRQ(ierr); 817 #endif 818 break; 819 820 default: 821 #if defined(PETSC_USE_COMPLEX) 822 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 823 824 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 825 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 826 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 827 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 830 ierr = ISDestroy(&list1);CHKERRQ(ierr); 831 ierr = ISDestroy(&list2);CHKERRQ(ierr); 832 #else 833 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 834 835 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 836 837 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 838 839 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 840 841 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 842 843 partial_dim = fftw->partial_dim; 844 845 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 846 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 847 848 if (dim[ndim-1]%2==0) NM = 2; 849 else NM = 1; 850 851 j = low; 852 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 853 indx1[i] = local_0_start*partial_dim + i; 854 indx2[i] = j; 855 if (k%dim[ndim-1]==0) j+=NM; 856 j++; 857 } 858 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 859 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 860 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 861 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 864 ierr = ISDestroy(&list1);CHKERRQ(ierr); 865 ierr = ISDestroy(&list2);CHKERRQ(ierr); 866 ierr = PetscFree(indx1);CHKERRQ(ierr); 867 ierr = PetscFree(indx2);CHKERRQ(ierr); 868 #endif 869 break; 870 } 871 } 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "VecScatterFFTWToPetsc" 877 /*@ 878 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 879 880 Collective on Mat 881 882 Input Parameters: 883 + A - FFTW matrix 884 - x - FFTW vector 885 886 Output Parameters: 887 . y - PETSc vector 888 889 Level: intermediate 890 891 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 892 VecScatterFFTWToPetsc removes those extra zeros. 893 894 .seealso: VecScatterPetscToFFTW() 895 @*/ 896 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 897 { 898 PetscErrorCode ierr; 899 900 PetscFunctionBegin; 901 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 907 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 908 { 909 PetscErrorCode ierr; 910 MPI_Comm comm; 911 Mat_FFT *fft = (Mat_FFT*)A->data; 912 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 913 PetscInt N = fft->N; 914 PetscInt ndim = fft->ndim,*dim=fft->dim; 915 PetscInt low; 916 PetscMPIInt rank,size; 917 ptrdiff_t local_n0,local_0_start; 918 ptrdiff_t local_n1,local_1_start; 919 VecScatter vecscat; 920 IS list1,list2; 921 #if !defined(PETSC_USE_COMPLEX) 922 PetscInt i,j,k,partial_dim; 923 PetscInt *indx1, *indx2, tempindx, tempindx1; 924 PetscInt N1, n1,NM; 925 ptrdiff_t temp; 926 #endif 927 928 PetscFunctionBegin; 929 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 930 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 931 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 932 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 933 934 if (size==1) { 935 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 936 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 937 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 938 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 939 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 940 ierr = ISDestroy(&list1);CHKERRQ(ierr); 941 942 } else { 943 switch (ndim) { 944 case 1: 945 #if defined(PETSC_USE_COMPLEX) 946 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 947 948 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 949 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 950 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 951 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 953 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 954 ierr = ISDestroy(&list1);CHKERRQ(ierr); 955 ierr = ISDestroy(&list2);CHKERRQ(ierr); 956 #else 957 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 958 #endif 959 break; 960 case 2: 961 #if defined(PETSC_USE_COMPLEX) 962 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 963 964 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 965 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 966 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 967 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 969 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 970 ierr = ISDestroy(&list1);CHKERRQ(ierr); 971 ierr = ISDestroy(&list2);CHKERRQ(ierr); 972 #else 973 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 974 975 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 976 977 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 978 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 979 980 if (dim[1]%2==0) NM = dim[1]+2; 981 else NM = dim[1]+1; 982 983 for (i=0; i<local_n0; i++) { 984 for (j=0; j<dim[1]; j++) { 985 tempindx = i*dim[1] + j; 986 tempindx1 = i*NM + j; 987 988 indx1[tempindx]=local_0_start*dim[1]+tempindx; 989 indx2[tempindx]=low+tempindx1; 990 } 991 } 992 993 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 994 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 995 996 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 997 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1000 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1001 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1002 ierr = PetscFree(indx1);CHKERRQ(ierr); 1003 ierr = PetscFree(indx2);CHKERRQ(ierr); 1004 #endif 1005 break; 1006 case 3: 1007 #if defined(PETSC_USE_COMPLEX) 1008 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1009 1010 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1011 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1012 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1013 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1016 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1017 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1018 #else 1019 1020 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1021 1022 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1023 1024 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1025 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1026 1027 if (dim[2]%2==0) NM = dim[2]+2; 1028 else NM = dim[2]+1; 1029 1030 for (i=0; i<local_n0; i++) { 1031 for (j=0; j<dim[1]; j++) { 1032 for (k=0; k<dim[2]; k++) { 1033 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1034 tempindx1 = i*dim[1]*NM + j*NM + k; 1035 1036 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1037 indx2[tempindx]=low+tempindx1; 1038 } 1039 } 1040 } 1041 1042 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1043 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1044 1045 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1046 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1047 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1048 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1049 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1050 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1051 ierr = PetscFree(indx1);CHKERRQ(ierr); 1052 ierr = PetscFree(indx2);CHKERRQ(ierr); 1053 #endif 1054 break; 1055 default: 1056 #if defined(PETSC_USE_COMPLEX) 1057 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1058 1059 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1060 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1061 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1062 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1064 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1065 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1066 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1067 #else 1068 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1069 1070 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1071 1072 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1073 1074 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1075 1076 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1077 1078 partial_dim = fftw->partial_dim; 1079 1080 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1081 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1082 1083 if (dim[ndim-1]%2==0) NM = 2; 1084 else NM = 1; 1085 1086 j = low; 1087 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1088 indx1[i] = local_0_start*partial_dim + i; 1089 indx2[i] = j; 1090 if (k%dim[ndim-1]==0) j+=NM; 1091 j++; 1092 } 1093 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1094 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1095 1096 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1097 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1098 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1099 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1100 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1101 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1102 ierr = PetscFree(indx1);CHKERRQ(ierr); 1103 ierr = PetscFree(indx2);CHKERRQ(ierr); 1104 #endif 1105 break; 1106 } 1107 } 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "MatCreate_FFTW" 1113 /* 1114 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1115 1116 Options Database Keys: 1117 + -mat_fftw_plannerflags - set FFTW planner flags 1118 1119 Level: intermediate 1120 1121 */ 1122 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1123 { 1124 PetscErrorCode ierr; 1125 MPI_Comm comm; 1126 Mat_FFT *fft=(Mat_FFT*)A->data; 1127 Mat_FFTW *fftw; 1128 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1129 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1130 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1131 PetscBool flg; 1132 PetscInt p_flag,partial_dim=1,ctr; 1133 PetscMPIInt size,rank; 1134 ptrdiff_t *pdim; 1135 ptrdiff_t local_n1,local_1_start; 1136 #if !defined(PETSC_USE_COMPLEX) 1137 ptrdiff_t temp; 1138 PetscInt N1,tot_dim; 1139 #else 1140 PetscInt n1; 1141 #endif 1142 1143 PetscFunctionBegin; 1144 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1145 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1146 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1147 1148 fftw_mpi_init(); 1149 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1150 pdim[0] = dim[0]; 1151 #if !defined(PETSC_USE_COMPLEX) 1152 tot_dim = 2*dim[0]; 1153 #endif 1154 for (ctr=1; ctr<ndim; ctr++) { 1155 partial_dim *= dim[ctr]; 1156 pdim[ctr] = dim[ctr]; 1157 #if !defined(PETSC_USE_COMPLEX) 1158 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1159 else tot_dim *= dim[ctr]; 1160 #endif 1161 } 1162 1163 if (size == 1) { 1164 #if defined(PETSC_USE_COMPLEX) 1165 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1166 n = N; 1167 #else 1168 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1169 n = tot_dim; 1170 #endif 1171 1172 } else { 1173 ptrdiff_t local_n0,local_0_start; 1174 switch (ndim) { 1175 case 1: 1176 #if !defined(PETSC_USE_COMPLEX) 1177 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1178 #else 1179 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1180 1181 n = (PetscInt)local_n0; 1182 n1 = (PetscInt)local_n1; 1183 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1184 #endif 1185 break; 1186 case 2: 1187 #if defined(PETSC_USE_COMPLEX) 1188 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1189 /* 1190 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1191 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1192 */ 1193 n = (PetscInt)local_n0*dim[1]; 1194 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1195 #else 1196 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1197 1198 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1199 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1200 #endif 1201 break; 1202 case 3: 1203 #if defined(PETSC_USE_COMPLEX) 1204 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1205 1206 n = (PetscInt)local_n0*dim[1]*dim[2]; 1207 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1208 #else 1209 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1210 1211 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1212 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1213 #endif 1214 break; 1215 default: 1216 #if defined(PETSC_USE_COMPLEX) 1217 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1218 1219 n = (PetscInt)local_n0*partial_dim; 1220 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1221 #else 1222 temp = pdim[ndim-1]; 1223 1224 pdim[ndim-1] = temp/2 + 1; 1225 1226 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1227 1228 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1229 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1230 1231 pdim[ndim-1] = temp; 1232 1233 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1234 #endif 1235 break; 1236 } 1237 } 1238 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1239 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1240 fft->data = (void*)fftw; 1241 1242 fft->n = n; 1243 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1244 fftw->partial_dim = partial_dim; 1245 1246 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1247 if (size == 1) { 1248 /* ierr = PetscMalloc(ndim, &(fftw->iodims));CHKERRQ(ierr); error! */ 1249 /* ierr = PetscMalloc(ndim*sizeof(fftw_iodim),&(fftw->iodims));CHKERRQ(ierr); -not sure if this is ok */ 1250 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1251 } 1252 1253 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1254 1255 fftw->p_forward = 0; 1256 fftw->p_backward = 0; 1257 fftw->p_flag = FFTW_ESTIMATE; 1258 1259 if (size == 1) { 1260 A->ops->mult = MatMult_SeqFFTW; 1261 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1262 } else { 1263 A->ops->mult = MatMult_MPIFFTW; 1264 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1265 } 1266 fft->matdestroy = MatDestroy_FFTW; 1267 A->assembled = PETSC_TRUE; 1268 A->preallocated = PETSC_TRUE; 1269 1270 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1273 1274 /* get runtime options */ 1275 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1276 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1277 if (flg) { 1278 fftw->p_flag = iplans[p_flag]; 1279 } 1280 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 1285 1286 1287