1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 28 /* MatMult_SeqFFTW performs forward DFT in parallel 29 30 Input parameter: 31 mat - the matrix 32 x - the vector on which FDFT will be performed 33 34 Output parameter: 35 y - vector that stores result of FDFT 36 37 */ 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatMult_SeqFFTW" 40 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 41 { 42 PetscErrorCode ierr; 43 Mat_FFT *fft = (Mat_FFT*)A->data; 44 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 45 PetscScalar *x_array,*y_array; 46 PetscInt ndim=fft->ndim,*dim=fft->dim; 47 48 PetscFunctionBegin; 49 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 50 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 51 if (!fftw->p_forward){ /* create a plan, then excute it */ 52 switch (ndim){ 53 case 1: 54 #if defined(PETSC_USE_COMPLEX) 55 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 56 #else 57 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 58 #endif 59 break; 60 case 2: 61 #if defined(PETSC_USE_COMPLEX) 62 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 63 #else 64 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 65 #endif 66 break; 67 case 3: 68 #if defined(PETSC_USE_COMPLEX) 69 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 70 #else 71 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 72 #endif 73 break; 74 default: 75 #if defined(PETSC_USE_COMPLEX) 76 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 77 #else 78 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 79 #endif 80 break; 81 } 82 fftw->finarray = x_array; 83 fftw->foutarray = y_array; 84 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 85 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 86 fftw_execute(fftw->p_forward); 87 } else { /* use existing plan */ 88 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 89 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 90 } else { 91 fftw_execute(fftw->p_forward); 92 } 93 } 94 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 95 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 99 /* MatMultTranspose_SeqFFTW performs serial backward DFT 100 101 Input parameter: 102 mat - the matrix 103 x - the vector on which BDFT will be performed 104 105 Output parameter: 106 y - vector that stores result of BDFT 107 108 */ 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 112 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 113 { 114 PetscErrorCode ierr; 115 Mat_FFT *fft = (Mat_FFT*)A->data; 116 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 117 PetscScalar *x_array,*y_array; 118 PetscInt ndim=fft->ndim,*dim=fft->dim; 119 120 PetscFunctionBegin; 121 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 122 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 123 if (!fftw->p_backward){ /* create a plan, then excute it */ 124 switch (ndim){ 125 case 1: 126 #if defined(PETSC_USE_COMPLEX) 127 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 128 #else 129 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 130 #endif 131 break; 132 case 2: 133 #if defined(PETSC_USE_COMPLEX) 134 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 135 #else 136 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 137 #endif 138 break; 139 case 3: 140 #if defined(PETSC_USE_COMPLEX) 141 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 142 #else 143 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 144 #endif 145 break; 146 default: 147 #if defined(PETSC_USE_COMPLEX) 148 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 149 #else 150 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 151 #endif 152 break; 153 } 154 fftw->binarray = x_array; 155 fftw->boutarray = y_array; 156 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 157 } else { /* use existing plan */ 158 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 159 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 160 } else { 161 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 162 } 163 } 164 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 165 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 /* MatMult_MPIFFTW performs forward DFT in parallel 170 171 Input parameter: 172 mat - the matrix 173 x - the vector on which FDFT will be performed 174 175 Output parameter: 176 y - vector that stores result of FDFT 177 178 */ 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatMult_MPIFFTW" 182 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 183 { 184 PetscErrorCode ierr; 185 Mat_FFT *fft = (Mat_FFT*)A->data; 186 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 187 PetscScalar *x_array,*y_array; 188 PetscInt ndim=fft->ndim,*dim=fft->dim; 189 MPI_Comm comm=((PetscObject)A)->comm; 190 191 PetscFunctionBegin; 192 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 193 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 194 if (!fftw->p_forward){ /* create a plan, then excute it */ 195 switch (ndim){ 196 case 1: 197 #if defined(PETSC_USE_COMPLEX) 198 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 199 #else 200 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 201 #endif 202 break; 203 case 2: 204 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 205 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 206 #else 207 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 208 #endif 209 break; 210 case 3: 211 #if defined(PETSC_USE_COMPLEX) 212 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 213 #else 214 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 215 #endif 216 break; 217 default: 218 #if defined(PETSC_USE_COMPLEX) 219 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 220 #else 221 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 222 #endif 223 break; 224 } 225 fftw->finarray = x_array; 226 fftw->foutarray = y_array; 227 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 228 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 229 fftw_execute(fftw->p_forward); 230 } else { /* use existing plan */ 231 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 232 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 233 } else { 234 fftw_execute(fftw->p_forward); 235 } 236 } 237 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 238 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 243 244 Input parameter: 245 mat - the matrix 246 x - the vector on which BDFT will be performed 247 248 Output parameter: 249 y - vector that stores result of BDFT 250 251 */ 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 254 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 255 { 256 PetscErrorCode ierr; 257 Mat_FFT *fft = (Mat_FFT*)A->data; 258 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 259 PetscScalar *x_array,*y_array; 260 PetscInt ndim=fft->ndim,*dim=fft->dim; 261 MPI_Comm comm=((PetscObject)A)->comm; 262 263 PetscFunctionBegin; 264 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 265 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 266 if (!fftw->p_backward){ /* create a plan, then excute it */ 267 switch (ndim){ 268 case 1: 269 #if defined(PETSC_USE_COMPLEX) 270 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 271 #else 272 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 273 #endif 274 break; 275 case 2: 276 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 277 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 278 #else 279 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 280 #endif 281 break; 282 case 3: 283 #if defined(PETSC_USE_COMPLEX) 284 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 285 #else 286 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 287 #endif 288 break; 289 default: 290 #if defined(PETSC_USE_COMPLEX) 291 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 292 #else 293 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 294 #endif 295 break; 296 } 297 fftw->binarray = x_array; 298 fftw->boutarray = y_array; 299 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 300 } else { /* use existing plan */ 301 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 302 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 303 } else { 304 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 305 } 306 } 307 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 308 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatDestroy_FFTW" 314 PetscErrorCode MatDestroy_FFTW(Mat A) 315 { 316 Mat_FFT *fft = (Mat_FFT*)A->data; 317 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 fftw_destroy_plan(fftw->p_forward); 322 fftw_destroy_plan(fftw->p_backward); 323 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 324 ierr = PetscFree(fft->data);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 329 #undef __FUNCT__ 330 #define __FUNCT__ "VecDestroy_MPIFFTW" 331 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 332 { 333 PetscErrorCode ierr; 334 PetscScalar *array; 335 336 PetscFunctionBegin; 337 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 338 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 339 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 340 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatGetVecsFFTW" 347 /*@ 348 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 349 parallel layout determined by FFTW 350 351 Collective on Mat 352 353 Input Parameter: 354 . mat - the matrix 355 356 Output Parameter: 357 + fin - (optional) input vector of forward FFTW 358 - fout - (optional) output vector of forward FFTW 359 - bout - (optional) output vector of backward FFTW 360 361 Level: advanced 362 Note: The parallel layout of output of forward FFTW is always same as the input 363 of the backward FFTW. But parallel layout of the input vector of forward 364 FFTW might not be same as the output of backward FFTW. 365 Also note that we need to provide enough space while doing parallel real transform. 366 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 367 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 368 last dimension from n to n/2+1 while invoking this routine. 369 370 .seealso: MatCreateFFTW() 371 @*/ 372 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 373 { 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 378 PetscFunctionReturn(0); 379 } 380 381 EXTERN_C_BEGIN 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 384 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 385 { 386 PetscErrorCode ierr; 387 PetscMPIInt size,rank; 388 MPI_Comm comm=((PetscObject)A)->comm; 389 Mat_FFT *fft = (Mat_FFT*)A->data; 390 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 391 PetscInt N=fft->N; 392 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 393 394 PetscFunctionBegin; 395 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 396 PetscValidType(A,1); 397 398 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 399 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 400 if (size == 1){ /* sequential case */ 401 #if defined(PETSC_USE_COMPLEX) 402 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 403 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 404 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 405 #else 406 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 407 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 408 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 409 #endif 410 } else { /* parallel cases */ 411 ptrdiff_t alloc_local,local_n0,local_0_start; 412 ptrdiff_t local_n1; 413 fftw_complex *data_fout; 414 ptrdiff_t local_1_start; 415 #if defined(PETSC_USE_COMPLEX) 416 fftw_complex *data_fin,*data_bout; 417 #else 418 double *data_finr,*data_boutr; 419 PetscInt n1,N1,vsize; 420 ptrdiff_t temp; 421 #endif 422 423 switch (ndim){ 424 case 1: 425 #if !defined(PETSC_USE_COMPLEX) 426 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 427 #else 428 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 429 if (fin) { 430 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 431 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 432 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 433 } 434 if (fout) { 435 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 436 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 437 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 438 } 439 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 440 if (bout) { 441 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 442 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 443 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 444 } 445 break; 446 #endif 447 case 2: 448 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 449 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 450 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 451 if (fin) { 452 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 453 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 454 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 455 } 456 if (bout) { 457 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 458 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 459 ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 460 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 461 } 462 //n1 = 2*local_n1*dim[0]; 463 if (fout) { 464 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 465 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 466 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 467 } 468 #else 469 /* Get local size */ 470 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 471 if (fin) { 472 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 473 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 474 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 475 } 476 if (fout) { 477 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 478 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 479 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 if (bout) { 482 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 484 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 485 } 486 #endif 487 break; 488 case 3: 489 #if !defined(PETSC_USE_COMPLEX) 490 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 491 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 492 if (fin) { 493 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 494 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 495 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 496 } 497 if (bout) { 498 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 499 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 500 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 501 } 502 //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 503 if (fout) { 504 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 505 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 506 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 507 } 508 #else 509 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 510 if (fin) { 511 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 512 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 513 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 514 } 515 if (fout) { 516 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 517 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 518 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 if (bout) { 521 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 522 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 523 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 524 } 525 #endif 526 break; 527 default: 528 #if !defined(PETSC_USE_COMPLEX) 529 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 530 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 531 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 532 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 533 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 534 535 if (fin) { 536 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 537 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 538 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 539 } 540 if (bout) { 541 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 542 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 543 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 544 } 545 //temp = fftw->partial_dim; 546 //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 547 //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 548 if (fout) { 549 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 550 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 551 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 #else 554 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 555 if (fin) { 556 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 557 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 558 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 559 } 560 if (fout) { 561 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 562 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 563 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 564 } 565 if (bout) { 566 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 567 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 568 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 569 } 570 #endif 571 break; 572 } 573 } 574 PetscFunctionReturn(0); 575 } 576 EXTERN_C_END 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatGetVecs_FFTW" 580 /* 581 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 582 parallel layout determined by FFTW 583 584 Collective on Mat 585 586 Input Parameter: 587 . mat - the matrix 588 589 Output Parameter: 590 + fin - (optional) input vector of forward FFTW 591 - fout - (optional) output vector of forward FFTW 592 593 Level: advanced 594 595 .seealso: MatCreateFFTW() 596 */ 597 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 598 { 599 PetscErrorCode ierr; 600 PetscMPIInt size,rank; 601 MPI_Comm comm=((PetscObject)A)->comm; 602 Mat_FFT *fft = (Mat_FFT*)A->data; 603 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 604 PetscInt N=fft->N; 605 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 606 607 PetscFunctionBegin; 608 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 609 PetscValidType(A,1); 610 611 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 612 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 613 if (size == 1){ /* sequential case */ 614 #if defined(PETSC_USE_COMPLEX) 615 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 616 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 617 #else 618 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 619 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 620 #endif 621 } else { /* mpi case */ 622 ptrdiff_t alloc_local,local_n0,local_0_start; 623 ptrdiff_t local_n1,local_1_end; 624 fftw_complex *data_fin,*data_fout; 625 #if !defined(PETSC_USE_COMPLEX) 626 double *data_finr ; 627 ptrdiff_t local_1_start,temp; 628 PetscInt vsize,n1,N1; 629 #endif 630 631 // PetscInt ctr; 632 // ptrdiff_t ndim1,*pdim; 633 // ndim1=(ptrdiff_t) ndim; 634 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 635 636 // for(ctr=0;ctr<ndim;ctr++) 637 // {k 638 // pdim[ctr] = dim[ctr]; 639 // } 640 641 642 643 switch (ndim){ 644 case 1: 645 /* Get local size */ 646 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 647 if (fin) { 648 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 649 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 650 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 651 } 652 if (fout) { 653 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 654 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 655 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 656 } 657 break; 658 case 2: 659 #if !defined(PETSC_USE_COMPLEX) 660 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 661 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 662 if (fin) { 663 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 664 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 665 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 666 //printf("The code comes here with vector size %d\n",vsize); 667 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 668 } 669 n1 = 2*local_n1*(dim[0]); 670 //n1 = 2*local_n1*dim[1]; 671 //printf("The values are %ld\n",local_n1); 672 if (fout) { 673 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 674 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 675 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 676 } 677 678 #else 679 /* Get local size */ 680 //printf("Hope this does not come here"); 681 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 682 if (fin) { 683 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 684 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 685 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 686 } 687 if (fout) { 688 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 689 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 690 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 691 } 692 //printf("Hope this does not come here"); 693 #endif 694 break; 695 case 3: 696 /* Get local size */ 697 #if !defined(PETSC_USE_COMPLEX) 698 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 699 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 700 if (fin) { 701 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 702 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 703 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 704 //printf("The code comes here with vector size %d\n",vsize); 705 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 706 } 707 n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 708 if (fout) { 709 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 710 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 711 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 712 } 713 714 715 #else 716 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 717 // printf("The quantity n is %d",n); 718 if (fin) { 719 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 720 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 721 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 722 } 723 if (fout) { 724 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 725 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 726 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 727 } 728 #endif 729 break; 730 default: 731 /* Get local size */ 732 #if !defined(PETSC_USE_COMPLEX) 733 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 734 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 735 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 736 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 737 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 738 if (fin) { 739 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 740 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 741 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 742 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 743 } 744 temp = fftw->partial_dim; 745 fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 746 747 n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 748 if (fout) { 749 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 750 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 751 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 752 } 753 754 #else 755 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 756 if (fin) { 757 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 758 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 759 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 760 } 761 if (fout) { 762 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 763 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 764 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 765 } 766 #endif 767 break; 768 } 769 } 770 // if (fin){ 771 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 772 // } 773 // if (fout){ 774 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 775 // } 776 PetscFunctionReturn(0); 777 } 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "VecScatterPetscToFFTW" 781 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 782 { 783 PetscErrorCode ierr; 784 PetscFunctionBegin; 785 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 786 PetscFunctionReturn(0); 787 } 788 789 /* 790 VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block. 791 Input A, x, y 792 A - FFTW matrix 793 x - user data 794 Options Database Keys: 795 + -mat_fftw_plannerflags - set FFTW planner flags 796 797 Level: intermediate 798 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 799 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 800 zeros. This routine does that job by scattering operation. 801 802 803 */ 804 805 EXTERN_C_BEGIN 806 #undef __FUNCT__ 807 #define __FUNCT__ "VecScatterPetscToFFTW_FTTW" 808 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 809 { 810 PetscErrorCode ierr; 811 MPI_Comm comm=((PetscObject)A)->comm; 812 Mat_FFT *fft = (Mat_FFT*)A->data; 813 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 814 PetscInt N=fft->N; 815 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 816 PetscInt low; 817 PetscInt rank,size,vsize,vsize1; 818 ptrdiff_t alloc_local,local_n0,local_0_start; 819 ptrdiff_t local_n1,local_1_start; 820 VecScatter vecscat; 821 IS list1,list2; 822 #if !defined(PETSC_USE_COMPLEX) 823 PetscInt i,j,k,partial_dim; 824 PetscInt *indx1, *indx2, tempindx, tempindx1; 825 PetscInt N1, n1 ,NM; 826 ptrdiff_t temp; 827 #endif 828 829 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 830 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 831 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 832 833 if (size==1) 834 { 835 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 836 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 837 //printf("The values of sizes are %d and %d",vsize,vsize1); 838 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 839 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 840 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 841 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 843 ierr = ISDestroy(&list1);CHKERRQ(ierr); 844 } 845 846 else{ 847 848 switch (ndim){ 849 case 1: 850 #if defined(PETSC_USE_COMPLEX) 851 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 852 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 853 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 854 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 855 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 857 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 858 ierr = ISDestroy(&list1);CHKERRQ(ierr); 859 ierr = ISDestroy(&list2);CHKERRQ(ierr); 860 #else 861 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 862 #endif 863 break; 864 case 2: 865 #if defined(PETSC_USE_COMPLEX) 866 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 867 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 868 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 869 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 870 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 873 ierr = ISDestroy(&list1);CHKERRQ(ierr); 874 ierr = ISDestroy(&list2);CHKERRQ(ierr); 875 #else 876 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 877 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 878 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 879 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 880 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 881 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 882 883 if (dim[1]%2==0) 884 NM = dim[1]+2; 885 else 886 NM = dim[1]+1; 887 888 for (i=0;i<local_n0;i++){ 889 for (j=0;j<dim[1];j++){ 890 tempindx = i*dim[1] + j; 891 tempindx1 = i*NM + j; 892 indx1[tempindx]=local_0_start*dim[1]+tempindx; 893 indx2[tempindx]=low+tempindx1; 894 } 895 } 896 897 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 898 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 899 900 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 901 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 904 ierr = ISDestroy(&list1);CHKERRQ(ierr); 905 ierr = ISDestroy(&list2);CHKERRQ(ierr); 906 ierr = PetscFree(indx1);CHKERRQ(ierr); 907 ierr = PetscFree(indx2);CHKERRQ(ierr); 908 #endif 909 break; 910 911 case 3: 912 #if defined(PETSC_USE_COMPLEX) 913 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 914 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 915 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 916 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 917 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 920 ierr = ISDestroy(&list1);CHKERRQ(ierr); 921 ierr = ISDestroy(&list2);CHKERRQ(ierr); 922 #else 923 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 924 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 925 926 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 927 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 928 929 if (dim[2]%2==0) 930 NM = dim[2]+2; 931 else 932 NM = dim[2]+1; 933 934 for (i=0;i<local_n0;i++){ 935 for (j=0;j<dim[1];j++){ 936 for (k=0;k<dim[2];k++){ 937 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 938 tempindx1 = i*dim[1]*NM + j*NM + k; 939 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 940 indx2[tempindx]=low+tempindx1; 941 } 942 } 943 } 944 945 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 946 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 947 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 948 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 949 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 951 ierr = ISDestroy(&list1);CHKERRQ(ierr); 952 ierr = ISDestroy(&list2);CHKERRQ(ierr); 953 ierr = PetscFree(indx1);CHKERRQ(ierr); 954 ierr = PetscFree(indx2);CHKERRQ(ierr); 955 #endif 956 break; 957 958 default: 959 #if defined(PETSC_USE_COMPLEX) 960 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 961 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 962 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 963 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 964 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 967 ierr = ISDestroy(&list1);CHKERRQ(ierr); 968 ierr = ISDestroy(&list2);CHKERRQ(ierr); 969 #else 970 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 971 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 972 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 973 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 974 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 975 976 partial_dim = fftw->partial_dim; 977 978 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 979 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 980 981 if (dim[ndim-1]%2==0) 982 NM = 2; 983 else 984 NM = 1; 985 986 j = low; 987 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 988 { 989 indx1[i] = local_0_start*partial_dim + i; 990 indx2[i] = j; 991 if (k%dim[ndim-1]==0) 992 { j+=NM;} 993 j++; 994 } 995 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 996 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 997 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 998 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1000 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1001 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1002 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1003 ierr = PetscFree(indx1);CHKERRQ(ierr); 1004 ierr = PetscFree(indx2);CHKERRQ(ierr); 1005 #endif 1006 break; 1007 } 1008 } 1009 return(0); 1010 } 1011 EXTERN_C_END 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "VecScatterFFTWToPetsc" 1015 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 1016 { 1017 PetscErrorCode ierr; 1018 PetscFunctionBegin; 1019 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 /* 1024 VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use. 1025 Input A, x, y 1026 A - FFTW matrix 1027 x - FFTW vector 1028 y - PETSc vector that user can read 1029 1030 Level: intermediate 1031 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 1032 VecScatterFFTWToPetsc removes those extra zeros. 1033 1034 */ 1035 1036 EXTERN_C_BEGIN 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 1039 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 1040 { 1041 PetscErrorCode ierr; 1042 MPI_Comm comm=((PetscObject)A)->comm; 1043 Mat_FFT *fft = (Mat_FFT*)A->data; 1044 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 1045 PetscInt N=fft->N; 1046 PetscInt ndim=fft->ndim,*dim=fft->dim; 1047 PetscInt low; 1048 PetscInt rank,size; 1049 ptrdiff_t alloc_local,local_n0,local_0_start; 1050 ptrdiff_t local_n1,local_1_start; 1051 VecScatter vecscat; 1052 IS list1,list2; 1053 #if !defined(PETSC_USE_COMPLEX) 1054 PetscInt i,j,k,partial_dim; 1055 PetscInt *indx1, *indx2, tempindx, tempindx1; 1056 PetscInt N1, n1 ,NM; 1057 ptrdiff_t temp; 1058 #endif 1059 1060 1061 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1062 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1063 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 1064 1065 if (size==1){ 1066 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 1067 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1068 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1071 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1072 } 1073 else{ 1074 1075 switch (ndim){ 1076 case 1: 1077 #if defined(PETSC_USE_COMPLEX) 1078 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1079 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 1080 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 1081 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1082 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1084 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1085 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1086 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1087 #else 1088 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1089 #endif 1090 break; 1091 case 2: 1092 #if defined(PETSC_USE_COMPLEX) 1093 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1094 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1095 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1096 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1097 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1098 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1099 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1100 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1101 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1102 #else 1103 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1104 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 1105 1106 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1107 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 1108 1109 if (dim[1]%2==0) 1110 NM = dim[1]+2; 1111 else 1112 NM = dim[1]+1; 1113 1114 for (i=0;i<local_n0;i++){ 1115 for (j=0;j<dim[1];j++){ 1116 tempindx = i*dim[1] + j; 1117 tempindx1 = i*NM + j; 1118 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1119 indx2[tempindx]=low+tempindx1; 1120 } 1121 } 1122 1123 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1124 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1125 1126 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1127 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1128 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1129 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1130 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1131 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1132 ierr = PetscFree(indx1);CHKERRQ(ierr); 1133 ierr = PetscFree(indx2);CHKERRQ(ierr); 1134 #endif 1135 break; 1136 case 3: 1137 #if defined(PETSC_USE_COMPLEX) 1138 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1139 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1140 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1141 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1142 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1145 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1146 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1147 #else 1148 1149 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1150 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1151 1152 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1153 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1154 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1155 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1156 1157 if (dim[2]%2==0) 1158 NM = dim[2]+2; 1159 else 1160 NM = dim[2]+1; 1161 1162 for (i=0;i<local_n0;i++){ 1163 for (j=0;j<dim[1];j++){ 1164 for (k=0;k<dim[2];k++){ 1165 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1166 tempindx1 = i*dim[1]*NM + j*NM + k; 1167 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1168 indx2[tempindx]=low+tempindx1; 1169 } 1170 } 1171 } 1172 1173 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1174 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1175 1176 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1177 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1178 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1179 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1180 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1181 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1182 ierr = PetscFree(indx1);CHKERRQ(ierr); 1183 ierr = PetscFree(indx2);CHKERRQ(ierr); 1184 #endif 1185 break; 1186 default: 1187 #if defined(PETSC_USE_COMPLEX) 1188 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1189 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1190 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1191 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1192 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1193 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1194 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1195 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1196 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1197 #else 1198 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1199 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1200 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1201 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1202 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1203 1204 partial_dim = fftw->partial_dim; 1205 1206 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1207 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1208 1209 if (dim[ndim-1]%2==0) 1210 NM = 2; 1211 else 1212 NM = 1; 1213 1214 j = low; 1215 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1216 { 1217 indx1[i] = local_0_start*partial_dim + i; 1218 indx2[i] = j; 1219 if (k%dim[ndim-1]==0) 1220 { j+=NM;} 1221 j++; 1222 } 1223 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1224 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1225 1226 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1227 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1230 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1231 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1232 ierr = PetscFree(indx1);CHKERRQ(ierr); 1233 ierr = PetscFree(indx2);CHKERRQ(ierr); 1234 #endif 1235 break; 1236 } 1237 } 1238 return 0; 1239 } 1240 EXTERN_C_END 1241 1242 EXTERN_C_BEGIN 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatCreate_FFTW" 1245 /* 1246 MatCreate_FFTW - Creates a matrix object that provides FFT 1247 via the external package FFTW 1248 Options Database Keys: 1249 + -mat_fftw_plannerflags - set FFTW planner flags 1250 1251 Level: intermediate 1252 1253 */ 1254 1255 PetscErrorCode MatCreate_FFTW(Mat A) 1256 { 1257 PetscErrorCode ierr; 1258 MPI_Comm comm=((PetscObject)A)->comm; 1259 Mat_FFT *fft=(Mat_FFT*)A->data; 1260 Mat_FFTW *fftw; 1261 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1262 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1263 PetscBool flg; 1264 PetscInt p_flag,partial_dim=1,ctr; 1265 PetscMPIInt size,rank; 1266 ptrdiff_t *pdim; 1267 ptrdiff_t local_n1,local_1_start; 1268 #if !defined(PETSC_USE_COMPLEX) 1269 ptrdiff_t temp; 1270 PetscInt N1,tot_dim; 1271 #else 1272 PetscInt n1; 1273 #endif 1274 1275 PetscFunctionBegin; 1276 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1277 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1278 1279 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1280 pdim[0] = dim[0]; 1281 #if !defined(PETSC_USE_COMPLEX) 1282 tot_dim = 2*dim[0]; 1283 #endif 1284 for (ctr=1;ctr<ndim;ctr++) 1285 { 1286 partial_dim *= dim[ctr]; 1287 pdim[ctr] = dim[ctr]; 1288 #if !defined(PETSC_USE_COMPLEX) 1289 if (ctr==ndim-1) 1290 tot_dim *= (dim[ctr]/2+1); 1291 else 1292 tot_dim *= dim[ctr]; 1293 #endif 1294 } 1295 1296 if (size == 1) { 1297 #if defined(PETSC_USE_COMPLEX) 1298 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1299 n = N; 1300 #else 1301 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1302 n = tot_dim; 1303 #endif 1304 1305 } else { 1306 ptrdiff_t alloc_local,local_n0,local_0_start; 1307 switch (ndim){ 1308 case 1: 1309 #if !defined(PETSC_USE_COMPLEX) 1310 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1311 #else 1312 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1313 n = (PetscInt)local_n0; 1314 n1 = (PetscInt) local_n1; 1315 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1316 #endif 1317 break; 1318 case 2: 1319 #if defined(PETSC_USE_COMPLEX) 1320 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1321 /* 1322 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1323 PetscSynchronizedFlush(comm); 1324 */ 1325 n = (PetscInt)local_n0*dim[1]; 1326 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1327 #else 1328 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1329 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1330 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1331 #endif 1332 break; 1333 case 3: 1334 #if defined(PETSC_USE_COMPLEX) 1335 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1336 n = (PetscInt)local_n0*dim[1]*dim[2]; 1337 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1338 #else 1339 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1340 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1341 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1342 #endif 1343 break; 1344 default: 1345 #if defined(PETSC_USE_COMPLEX) 1346 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1347 n = (PetscInt)local_n0*partial_dim; 1348 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1349 #else 1350 temp = pdim[ndim-1]; 1351 pdim[ndim-1] = temp/2 + 1; 1352 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1353 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1354 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1355 pdim[ndim-1] = temp; 1356 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1357 #endif 1358 break; 1359 } 1360 } 1361 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1362 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1363 fft->data = (void*)fftw; 1364 1365 fft->n = n; 1366 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1367 fftw->partial_dim = partial_dim; 1368 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1369 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1370 1371 fftw->p_forward = 0; 1372 fftw->p_backward = 0; 1373 fftw->p_flag = FFTW_ESTIMATE; 1374 1375 if (size == 1){ 1376 A->ops->mult = MatMult_SeqFFTW; 1377 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1378 } else { 1379 A->ops->mult = MatMult_MPIFFTW; 1380 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1381 } 1382 fft->matdestroy = MatDestroy_FFTW; 1383 //A->ops->getvecs = MatGetVecs_FFTW; 1384 A->assembled = PETSC_TRUE; 1385 PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 1386 PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW); 1387 PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW); 1388 1389 /* get runtime options */ 1390 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1391 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1392 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1393 PetscOptionsEnd(); 1394 PetscFunctionReturn(0); 1395 } 1396 EXTERN_C_END 1397 1398 1399 1400 1401