1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 fftw_plan p_forward,p_backward; 15 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 16 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 17 executed for the arrays with which the plan was created */ 18 } Mat_FFTW; 19 20 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 21 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatDestroy_FFTW(Mat); 25 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 26 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 27 extern PetscErrorCode InputTransformFFT_FFTW(Mat,Vec,Vec); 28 extern PetscErrorCode InputTransformFFT(Mat,Vec,Vec); 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMult_SeqFFTW" 32 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 33 { 34 PetscErrorCode ierr; 35 Mat_FFT *fft = (Mat_FFT*)A->data; 36 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 37 PetscScalar *x_array,*y_array; 38 PetscInt ndim=fft->ndim,*dim=fft->dim; 39 40 PetscFunctionBegin; 41 #if !defined(PETSC_USE_COMPLEX) 42 43 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 44 #endif 45 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 46 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 47 if (!fftw->p_forward){ /* create a plan, then excute it */ 48 switch (ndim){ 49 case 1: 50 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 51 break; 52 case 2: 53 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 break; 55 case 3: 56 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 57 break; 58 default: 59 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 60 break; 61 } 62 fftw->finarray = x_array; 63 fftw->foutarray = y_array; 64 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 65 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 66 fftw_execute(fftw->p_forward); 67 } else { /* use existing plan */ 68 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 69 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 70 } else { 71 fftw_execute(fftw->p_forward); 72 } 73 } 74 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 75 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 #undef __FUNCT__ 80 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 81 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 82 { 83 PetscErrorCode ierr; 84 Mat_FFT *fft = (Mat_FFT*)A->data; 85 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 86 PetscScalar *x_array,*y_array; 87 PetscInt ndim=fft->ndim,*dim=fft->dim; 88 89 PetscFunctionBegin; 90 #if !defined(PETSC_USE_COMPLEX) 91 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 92 #endif 93 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 94 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 95 if (!fftw->p_backward){ /* create a plan, then excute it */ 96 switch (ndim){ 97 case 1: 98 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 99 break; 100 case 2: 101 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 102 break; 103 case 3: 104 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 105 break; 106 default: 107 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 108 break; 109 } 110 fftw->binarray = x_array; 111 fftw->boutarray = y_array; 112 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 113 } else { /* use existing plan */ 114 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 115 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 116 } else { 117 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 118 } 119 } 120 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 121 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 122 PetscFunctionReturn(0); 123 } 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatMult_MPIFFTW" 127 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 128 { 129 PetscErrorCode ierr; 130 Mat_FFT *fft = (Mat_FFT*)A->data; 131 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 132 PetscScalar *x_array,*y_array; 133 PetscInt ndim=fft->ndim,*dim=fft->dim; 134 MPI_Comm comm=((PetscObject)A)->comm; 135 // PetscInt ctr; 136 // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 137 // ndim1=(ptrdiff_t) ndim; 138 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 139 140 // for(ctr=0;ctr<ndim;ctr++) 141 // { 142 // pdim[ctr] = dim[ctr]; 143 // } 144 145 PetscFunctionBegin; 146 //#if !defined(PETSC_USE_COMPLEX) 147 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 148 //#endif 149 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 150 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 151 152 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 153 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 154 if (!fftw->p_forward){ /* create a plan, then excute it */ 155 switch (ndim){ 156 case 1: 157 #if defined(PETSC_USE_COMPLEX) 158 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 159 #endif 160 break; 161 case 2: 162 #if defined(PETSC_USE_COMPLEX) 163 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 164 #else 165 printf("The code comes here \n"); 166 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 167 #endif 168 break; 169 case 3: 170 #if defined(PETSC_USE_COMPLEX) 171 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 172 #else 173 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 174 #endif 175 break; 176 default: 177 #if defined(PETSC_USE_COMPLEX) 178 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 179 #else 180 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 181 #endif 182 // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 183 break; 184 } 185 fftw->finarray = x_array; 186 fftw->foutarray = y_array; 187 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 188 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 189 fftw_execute(fftw->p_forward); 190 } else { /* use existing plan */ 191 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 192 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 193 } else { 194 fftw_execute(fftw->p_forward); 195 } 196 } 197 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 198 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 199 PetscFunctionReturn(0); 200 } 201 202 #undef __FUNCT__ 203 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 204 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 205 { 206 PetscErrorCode ierr; 207 Mat_FFT *fft = (Mat_FFT*)A->data; 208 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 209 PetscScalar *x_array,*y_array; 210 PetscInt ndim=fft->ndim,*dim=fft->dim; 211 MPI_Comm comm=((PetscObject)A)->comm; 212 // PetscInt ctr; 213 // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 214 // ndim1=(ptrdiff_t) ndim; 215 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 216 217 // for(ctr=0;ctr<ndim;ctr++) 218 // { 219 // pdim[ctr] = dim[ctr]; 220 // } 221 222 PetscFunctionBegin; 223 //#if !defined(PETSC_USE_COMPLEX) 224 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 225 //#endif 226 // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 227 // should pdim be a member of Mat_FFTW? 228 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 229 230 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 231 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 232 if (!fftw->p_backward){ /* create a plan, then excute it */ 233 switch (ndim){ 234 case 1: 235 #if defined(PETSC_USE_COMPLEX) 236 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 237 #endif 238 break; 239 case 2: 240 #if defined(PETSC_USE_COMPLEX) 241 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 242 #else 243 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 244 #endif 245 break; 246 case 3: 247 #if defined(PETSC_USE_COMPLEX) 248 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 249 #else 250 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 251 #endif 252 break; 253 default: 254 #if defined(PETSC_USE_COMPLEX) 255 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 256 #else 257 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 258 #endif 259 // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 260 break; 261 } 262 fftw->binarray = x_array; 263 fftw->boutarray = y_array; 264 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 265 } else { /* use existing plan */ 266 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 267 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 268 } else { 269 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 270 } 271 } 272 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 273 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatDestroy_FFTW" 279 PetscErrorCode MatDestroy_FFTW(Mat A) 280 { 281 Mat_FFT *fft = (Mat_FFT*)A->data; 282 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 283 PetscErrorCode ierr; 284 285 PetscFunctionBegin; 286 #if !defined(PETSC_USE_COMPLEX) 287 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 288 #endif 289 fftw_destroy_plan(fftw->p_forward); 290 fftw_destroy_plan(fftw->p_backward); 291 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 292 ierr = PetscFree(fft->data);CHKERRQ(ierr); 293 PetscFunctionReturn(0); 294 } 295 296 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 297 #undef __FUNCT__ 298 #define __FUNCT__ "VecDestroy_MPIFFTW" 299 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 300 { 301 PetscErrorCode ierr; 302 PetscScalar *array; 303 304 PetscFunctionBegin; 305 #if !defined(PETSC_USE_COMPLEX) 306 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 307 #endif 308 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 309 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 310 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 311 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatGetVecs1DC_FFTW" 317 /* 318 MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 319 parallel layout determined by FFTW-1D 320 321 */ 322 PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 323 { 324 PetscErrorCode ierr; 325 PetscMPIInt size,rank; 326 MPI_Comm comm=((PetscObject)A)->comm; 327 Mat_FFT *fft = (Mat_FFT*)A->data; 328 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 329 PetscInt N=fft->N; 330 PetscInt ndim=fft->ndim,*dim=fft->dim; 331 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 332 ptrdiff_t f_local_n1,f_local_1_end; 333 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 334 ptrdiff_t b_local_n1,b_local_1_end; 335 fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 336 337 PetscFunctionBegin; 338 #if !defined(PETSC_USE_COMPLEX) 339 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 340 #endif 341 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 342 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 343 if (size == 1){ 344 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 345 } 346 else { 347 if (ndim>1){ 348 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 349 else { 350 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 351 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 352 if (fin) { 353 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 354 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 355 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 356 } 357 if (fout) { 358 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 359 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 360 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 361 } 362 if (bin) { 363 data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 364 ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 365 (*bin)->ops->destroy = VecDestroy_MPIFFTW; 366 } 367 if (bout) { 368 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 369 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 370 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 371 } 372 } 373 if (fin){ 374 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 375 } 376 if (fout){ 377 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 378 } 379 if (bin){ 380 ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 381 } 382 if (bout){ 383 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 384 } 385 PetscFunctionReturn(0); 386 } 387 388 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatGetVecs_FFTW" 393 /* 394 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 395 parallel layout determined by FFTW 396 397 Collective on Mat 398 399 Input Parameter: 400 . mat - the matrix 401 402 Output Parameter: 403 + fin - (optional) input vector of forward FFTW 404 - fout - (optional) output vector of forward FFTW 405 406 Level: advanced 407 408 .seealso: MatCreateFFTW() 409 */ 410 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 411 { 412 PetscErrorCode ierr; 413 PetscMPIInt size,rank; 414 MPI_Comm comm=((PetscObject)A)->comm; 415 Mat_FFT *fft = (Mat_FFT*)A->data; 416 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 417 PetscInt N=fft->N, N1, n1,vsize; 418 419 PetscFunctionBegin; 420 //#if !defined(PETSC_USE_COMPLEX) 421 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 422 //#endif 423 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 424 PetscValidType(A,1); 425 426 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 427 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 428 if (size == 1){ /* sequential case */ 429 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 430 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 431 printf("The code successfully comes at the end of the routine with one processor\n"); 432 } else { /* mpi case */ 433 ptrdiff_t alloc_local,local_n0,local_0_start; 434 ptrdiff_t local_n1,local_1_end; 435 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 436 fftw_complex *data_fin,*data_fout; 437 double *data_finr ; 438 ptrdiff_t local_1_start,temp; 439 // PetscInt ctr; 440 // ptrdiff_t ndim1,*pdim; 441 // ndim1=(ptrdiff_t) ndim; 442 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 443 444 // for(ctr=0;ctr<ndim;ctr++) 445 // {k 446 // pdim[ctr] = dim[ctr]; 447 // } 448 449 450 451 switch (ndim){ 452 case 1: 453 /* Get local size */ 454 /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */ 455 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 456 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 457 if (fin) { 458 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 459 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 460 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 461 } 462 if (fout) { 463 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 464 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 465 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 466 } 467 break; 468 case 2: 469 #if !defined(PETSC_USE_COMPLEX) 470 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 471 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 472 if (fin) { 473 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 474 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 475 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 476 //printf("The code comes here with vector size %d\n",vsize); 477 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 478 } 479 if (fout) { 480 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 481 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 482 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 483 } 484 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 485 486 #else 487 /* Get local size */ 488 printf("Hope this does not come here"); 489 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 490 if (fin) { 491 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 492 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 493 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 494 } 495 if (fout) { 496 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 497 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 498 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 499 } 500 printf("Hope this does not come here"); 501 #endif 502 break; 503 case 3: 504 /* Get local size */ 505 #if !defined(PETSC_USE_COMPLEX) 506 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 507 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 508 if (fin) { 509 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 510 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 511 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 512 //printf("The code comes here with vector size %d\n",vsize); 513 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 514 } 515 if (fout) { 516 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 517 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 518 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 521 522 523 #else 524 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 525 // printf("The quantity n is %d",n); 526 if (fin) { 527 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 528 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 529 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 530 } 531 if (fout) { 532 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 533 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 534 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 535 } 536 #endif 537 break; 538 default: 539 /* Get local size */ 540 #if !defined(PETSC_USE_COMPLEX) 541 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 542 printf("The value of temp is %ld\n",temp); 543 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 544 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 545 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 546 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 547 if (fin) { 548 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 549 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 550 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 551 //printf("The code comes here with vector size %d\n",vsize); 552 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 553 } 554 if (fout) { 555 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 556 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 557 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 558 } 559 560 #else 561 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 562 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 563 // printf("The value of alloc local is %d",alloc_local); 564 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 565 // for(i=0;i<ndim;i++) 566 // { 567 // pdim[i]=dim[i];printf("%d",pdim[i]); 568 // } 569 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 570 // printf("The quantity n is %d",n); 571 if (fin) { 572 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 573 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 574 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 575 } 576 if (fout) { 577 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 578 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 579 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 580 } 581 #endif 582 break; 583 } 584 } 585 // if (fin){ 586 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 587 // } 588 // if (fout){ 589 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 590 // } 591 PetscFunctionReturn(0); 592 } 593 594 //EXTERN_C_BEGIN - Do we need this? 595 #undef __FUNCT__ 596 #define __FUNCT__ "InputTransformFFT" 597 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 598 { 599 PetscErrorCode ierr; 600 PetscFunctionBegin; 601 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 //EXTERN_C_END - Do we need this? 605 /* 606 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 607 Input A, x, y 608 A - FFTW matrix 609 x - user data 610 Options Database Keys: 611 + -mat_fftw_plannerflags - set FFTW planner flags 612 613 Level: intermediate 614 615 */ 616 617 EXTERN_C_BEGIN 618 #undef __FUNCT__ 619 #define __FUNCT__ "InputTransformFFT_FTTW" 620 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 621 { 622 PetscErrorCode ierr; 623 MPI_Comm comm=((PetscObject)A)->comm; 624 Mat_FFT *fft = (Mat_FFT*)A->data; 625 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 626 PetscInt N=fft->N, N1, n1 ,NM; 627 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 628 PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 629 PetscInt i,j,k,rank,size; 630 ptrdiff_t alloc_local,local_n0,local_0_start; 631 ptrdiff_t local_n1,local_1_start; 632 VecScatter vecscat; 633 IS list1,list2; 634 635 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 636 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 637 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 638 printf("Local ownership starts at %d\n",low); 639 640 switch (ndim){ 641 case 1: 642 SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW"); 643 break; 644 case 2: 645 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 646 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 647 648 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 649 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 650 printf("Val local_0_start = %ld\n",local_0_start); 651 652 if (dim[1]%2==0) 653 NM = dim[1]+2; 654 else 655 NM = dim[1]+1; 656 657 for (i=0;i<local_n0;i++){ 658 for (j=0;j<dim[1];j++){ 659 tempindx = i*dim[1] + j; 660 tempindx1 = i*NM + j; 661 indx1[tempindx]=local_0_start*dim[1]+tempindx; 662 indx2[tempindx]=low+tempindx1; 663 // printf("Val tempindx1 = %d\n",tempindx1); 664 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 665 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 666 // printf("-------------------------\n",indx2[tempindx],rank); 667 } 668 } 669 670 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 671 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 672 673 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 674 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 675 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 676 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 677 break; 678 679 case 3: 680 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 681 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 682 683 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 684 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 685 printf("Val local_0_start = %ld\n",local_0_start); 686 687 if (dim[2]%2==0) 688 NM = dim[2]+2; 689 else 690 NM = dim[2]+1; 691 692 for (i=0;i<local_n0;i++){ 693 for (j=0;j<dim[1];j++){ 694 for (k=0;k<dim[2];k++){ 695 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 696 tempindx1 = i*dim[1]*NM + j*NM + k; 697 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 698 indx2[tempindx]=low+tempindx1; 699 } 700 // printf("Val tempindx1 = %d\n",tempindx1); 701 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 702 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 703 // printf("-------------------------\n",indx2[tempindx],rank); 704 } 705 } 706 707 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 708 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 709 710 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 711 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 712 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 713 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 714 break; 715 716 default: 717 SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 718 break; 719 } 720 721 return 0; 722 } 723 EXTERN_C_END 724 725 726 //EXTERN_C_BEGIN - Do we need this? 727 #undef __FUNCT__ 728 #define __FUNCT__ "OutputTransformFFT" 729 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 730 { 731 PetscErrorCode ierr; 732 PetscFunctionBegin; 733 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 734 PetscFunctionReturn(0); 735 } 736 //EXTERN_C_END - Do we need this? 737 /* 738 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 739 Input A, x, y 740 A - FFTW matrix 741 x - FFTW vector 742 y - PETSc vector that user can read 743 Options Database Keys: 744 + -mat_fftw_plannerflags - set FFTW planner flags 745 746 Level: intermediate 747 748 */ 749 750 EXTERN_C_BEGIN 751 #undef __FUNCT__ 752 #define __FUNCT__ "OutputTransformFFT_FTTW" 753 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 754 { 755 PetscErrorCode ierr; 756 MPI_Comm comm=((PetscObject)A)->comm; 757 Mat_FFT *fft = (Mat_FFT*)A->data; 758 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 759 PetscInt N=fft->N, N1, n1 ,NM; 760 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 761 PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 762 PetscInt i,j,k,rank,size; 763 ptrdiff_t alloc_local,local_n0,local_0_start; 764 ptrdiff_t local_n1,local_1_start; 765 VecScatter vecscat; 766 IS list1,list2; 767 768 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 769 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 770 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 771 printf("Local ownership starts at %d\n",low); 772 773 switch (ndim){ 774 case 1: 775 SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW"); 776 break; 777 case 2: 778 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 779 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 780 781 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 782 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 783 printf("Val local_0_start = %ld\n",local_0_start); 784 785 if (dim[1]%2==0) 786 NM = dim[1]+2; 787 else 788 NM = dim[1]+1; 789 790 791 792 for (i=0;i<local_n0;i++){ 793 for (j=0;j<dim[1];j++){ 794 tempindx = i*dim[1] + j; 795 tempindx1 = i*NM + j; 796 indx1[tempindx]=local_0_start*dim[1]+tempindx; 797 indx2[tempindx]=low+tempindx1; 798 // printf("Val tempindx1 = %d\n",tempindx1); 799 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 800 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 801 // printf("-------------------------\n",indx2[tempindx],rank); 802 } 803 } 804 805 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 806 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 807 808 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 809 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 810 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 811 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 812 break; 813 814 case 3: 815 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 816 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 817 818 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 819 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 820 printf("Val local_0_start = %ld\n",local_0_start); 821 822 if (dim[2]%2==0) 823 NM = dim[2]+2; 824 else 825 NM = dim[2]+1; 826 827 for (i=0;i<local_n0;i++){ 828 for (j=0;j<dim[1];j++){ 829 for (k=0;k<dim[2];k++){ 830 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 831 tempindx1 = i*dim[1]*NM + j*NM + k; 832 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 833 indx2[tempindx]=low+tempindx1; 834 } 835 // printf("Val tempindx1 = %d\n",tempindx1); 836 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 837 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 838 // printf("-------------------------\n",indx2[tempindx],rank); 839 } 840 } 841 842 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 843 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 844 845 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 846 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 847 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 848 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 849 break; 850 851 default: 852 SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 853 break; 854 } 855 856 return 0; 857 } 858 EXTERN_C_END 859 860 861 862 863 EXTERN_C_BEGIN 864 #undef __FUNCT__ 865 #define __FUNCT__ "MatCreate_FFTW" 866 /* 867 MatCreate_FFTW - Creates a matrix object that provides FFT 868 via the external package FFTW 869 Options Database Keys: 870 + -mat_fftw_plannerflags - set FFTW planner flags 871 872 Level: intermediate 873 874 */ 875 876 PetscErrorCode MatCreate_FFTW(Mat A) 877 { 878 PetscErrorCode ierr; 879 MPI_Comm comm=((PetscObject)A)->comm; 880 Mat_FFT *fft=(Mat_FFT*)A->data; 881 Mat_FFTW *fftw; 882 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 883 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 884 PetscBool flg; 885 PetscInt p_flag,partial_dim=1,ctr,N1; 886 PetscMPIInt size,rank; 887 ptrdiff_t *pdim, temp; 888 ptrdiff_t local_n1,local_1_start; 889 890 PetscFunctionBegin; 891 //#if !defined(PETSC_USE_COMPLEX) 892 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 893 //#endif 894 895 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 896 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 897 ierr = MPI_Barrier(PETSC_COMM_WORLD); 898 899 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 900 pdim[0] = dim[0]; 901 for(ctr=1;ctr<ndim;ctr++) 902 { 903 partial_dim *= dim[ctr]; 904 pdim[ctr] = dim[ctr]; 905 } 906 //#if !defined(PETSC_USE_COMPLEX) 907 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 908 //#endif 909 910 // printf("partial dimension is %d",partial_dim); 911 if (size == 1) { 912 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 913 n = N; 914 } else { 915 ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 916 switch (ndim){ 917 case 1: 918 #if !defined(PETSC_USE_COMPLEX) 919 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 920 #endif 921 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 922 n = (PetscInt)local_n0; 923 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 924 // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 925 break; 926 case 2: 927 #if defined(PETSC_USE_COMPLEX) 928 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 929 /* 930 PetscMPIInt rank; 931 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 932 PetscSynchronizedFlush(comm); 933 */ 934 n = (PetscInt)local_n0*dim[1]; 935 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 936 #else 937 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 938 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 939 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 940 #endif 941 break; 942 case 3: 943 // printf("The value of alloc local is %d",alloc_local); 944 #if defined(PETSC_USE_COMPLEX) 945 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 946 n = (PetscInt)local_n0*dim[1]*dim[2]; 947 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 948 #else 949 printf("The code comes here\n"); 950 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 951 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 952 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 953 #endif 954 break; 955 default: 956 #if defined(PETSC_USE_COMPLEX) 957 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 958 // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 959 // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 960 n = (PetscInt)local_n0*partial_dim; 961 // printf("New partial dimension is %d %d %d",n,N,ndim); 962 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 963 #else 964 temp = pdim[ndim-1]; 965 pdim[ndim-1]= temp/2 + 1; 966 printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 967 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 968 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 969 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 970 pdim[ndim-1] = temp; 971 printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 972 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 973 #endif 974 break; 975 } 976 } 977 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 978 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 979 fft->data = (void*)fftw; 980 981 fft->n = n; 982 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 983 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 984 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 985 986 fftw->p_forward = 0; 987 fftw->p_backward = 0; 988 fftw->p_flag = FFTW_ESTIMATE; 989 990 if (size == 1){ 991 A->ops->mult = MatMult_SeqFFTW; 992 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 993 } else { 994 A->ops->mult = MatMult_MPIFFTW; 995 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 996 } 997 fft->matdestroy = MatDestroy_FFTW; 998 A->ops->getvecs = MatGetVecs_FFTW; 999 A->assembled = PETSC_TRUE; 1000 #if !defined(PETSC_USE_COMPLEX) 1001 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 1002 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1003 #endif 1004 1005 /* get runtime options */ 1006 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1007 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1008 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1009 PetscOptionsEnd(); 1010 PetscFunctionReturn(0); 1011 } 1012 EXTERN_C_END 1013 1014 1015 1016 1017