1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 fftw_plan p_forward,p_backward; 15 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 16 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 17 executed for the arrays with which the plan was created */ 18 } Mat_FFTW; 19 20 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 21 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatDestroy_FFTW(Mat); 25 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 26 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatMult_SeqFFTW" 30 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 31 { 32 PetscErrorCode ierr; 33 Mat_FFT *fft = (Mat_FFT*)A->data; 34 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 35 PetscScalar *x_array,*y_array; 36 PetscInt ndim=fft->ndim,*dim=fft->dim; 37 38 PetscFunctionBegin; 39 #if !defined(PETSC_USE_COMPLEX) 40 41 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 42 #endif 43 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 44 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 45 if (!fftw->p_forward){ /* create a plan, then excute it */ 46 switch (ndim){ 47 case 1: 48 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 49 break; 50 case 2: 51 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 52 break; 53 case 3: 54 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 55 break; 56 default: 57 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 58 break; 59 } 60 fftw->finarray = x_array; 61 fftw->foutarray = y_array; 62 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 63 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 64 fftw_execute(fftw->p_forward); 65 } else { /* use existing plan */ 66 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 67 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 68 } else { 69 fftw_execute(fftw->p_forward); 70 } 71 } 72 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 73 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 79 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 80 { 81 PetscErrorCode ierr; 82 Mat_FFT *fft = (Mat_FFT*)A->data; 83 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 84 PetscScalar *x_array,*y_array; 85 PetscInt ndim=fft->ndim,*dim=fft->dim; 86 87 PetscFunctionBegin; 88 #if !defined(PETSC_USE_COMPLEX) 89 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 90 #endif 91 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 92 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 93 if (!fftw->p_backward){ /* create a plan, then excute it */ 94 switch (ndim){ 95 case 1: 96 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 97 break; 98 case 2: 99 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 100 break; 101 case 3: 102 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 103 break; 104 default: 105 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 106 break; 107 } 108 fftw->binarray = x_array; 109 fftw->boutarray = y_array; 110 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 111 } else { /* use existing plan */ 112 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 113 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 114 } else { 115 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 116 } 117 } 118 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 119 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatMult_MPIFFTW" 125 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 126 { 127 PetscErrorCode ierr; 128 Mat_FFT *fft = (Mat_FFT*)A->data; 129 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 130 PetscScalar *x_array,*y_array; 131 PetscInt ndim=fft->ndim,*dim=fft->dim; 132 MPI_Comm comm=((PetscObject)A)->comm; 133 // PetscInt ctr; 134 // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 135 // ndim1=(ptrdiff_t) ndim; 136 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 137 138 // for(ctr=0;ctr<ndim;ctr++) 139 // { 140 // pdim[ctr] = dim[ctr]; 141 // } 142 143 PetscFunctionBegin; 144 #if !defined(PETSC_USE_COMPLEX) 145 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 146 #endif 147 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 148 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 149 150 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 151 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 152 if (!fftw->p_forward){ /* create a plan, then excute it */ 153 switch (ndim){ 154 case 1: 155 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 156 break; 157 case 2: 158 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 159 break; 160 case 3: 161 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 162 break; 163 default: 164 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 165 // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 166 break; 167 } 168 fftw->finarray = x_array; 169 fftw->foutarray = y_array; 170 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 171 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 172 fftw_execute(fftw->p_forward); 173 } else { /* use existing plan */ 174 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 175 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 176 } else { 177 fftw_execute(fftw->p_forward); 178 } 179 } 180 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 181 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 187 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 188 { 189 PetscErrorCode ierr; 190 Mat_FFT *fft = (Mat_FFT*)A->data; 191 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 192 PetscScalar *x_array,*y_array; 193 PetscInt ndim=fft->ndim,*dim=fft->dim; 194 MPI_Comm comm=((PetscObject)A)->comm; 195 // PetscInt ctr; 196 // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 197 // ndim1=(ptrdiff_t) ndim; 198 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 199 200 // for(ctr=0;ctr<ndim;ctr++) 201 // { 202 // pdim[ctr] = dim[ctr]; 203 // } 204 205 PetscFunctionBegin; 206 #if !defined(PETSC_USE_COMPLEX) 207 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 208 #endif 209 // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 210 // should pdim be a member of Mat_FFTW? 211 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 212 213 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 214 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 215 if (!fftw->p_backward){ /* create a plan, then excute it */ 216 switch (ndim){ 217 case 1: 218 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 219 break; 220 case 2: 221 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 222 break; 223 case 3: 224 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 225 break; 226 default: 227 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 228 // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 229 break; 230 } 231 fftw->binarray = x_array; 232 fftw->boutarray = y_array; 233 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 234 } else { /* use existing plan */ 235 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 236 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 237 } else { 238 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 239 } 240 } 241 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 242 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatDestroy_FFTW" 248 PetscErrorCode MatDestroy_FFTW(Mat A) 249 { 250 Mat_FFT *fft = (Mat_FFT*)A->data; 251 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 #if !defined(PETSC_USE_COMPLEX) 256 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 257 #endif 258 fftw_destroy_plan(fftw->p_forward); 259 fftw_destroy_plan(fftw->p_backward); 260 ierr = PetscFree(fft->data);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 265 #undef __FUNCT__ 266 #define __FUNCT__ "VecDestroy_MPIFFTW" 267 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 268 { 269 PetscErrorCode ierr; 270 PetscScalar *array; 271 272 PetscFunctionBegin; 273 #if !defined(PETSC_USE_COMPLEX) 274 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 275 #endif 276 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 277 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 278 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 279 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatGetVecs_FFTW1D" 285 /* 286 MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 287 parallel layout determined by FFTW-1D 288 289 */ 290 PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 291 { 292 PetscErrorCode ierr; 293 PetscMPIInt size,rank; 294 MPI_Comm comm=((PetscObject)A)->comm; 295 Mat_FFT *fft = (Mat_FFT*)A->data; 296 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 297 PetscInt N=fft->N; 298 PetscInt ndim=fft->ndim,*dim=fft->dim; 299 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 300 ptrdiff_t f_local_n1,f_local_1_end; 301 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 302 ptrdiff_t b_local_n1,b_local_1_end; 303 fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 304 305 PetscFunctionBegin; 306 #if !defined(PETSC_USE_COMPLEX) 307 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 308 #endif 309 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 310 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 311 if (size == 1){ 312 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 313 } 314 else { 315 if (ndim>1){ 316 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 317 else { 318 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 319 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 320 if (fin) { 321 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 322 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 323 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 324 } 325 if (fout) { 326 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 327 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 328 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 329 } 330 if (bin) { 331 data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 332 ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 333 (*bin)->ops->destroy = VecDestroy_MPIFFTW; 334 } 335 if (bout) { 336 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 337 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 338 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 339 } 340 } 341 if (fin){ 342 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 343 } 344 if (fout){ 345 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 346 } 347 if (bin){ 348 ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 349 } 350 if (bout){ 351 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 352 } 353 PetscFunctionReturn(0); 354 } 355 356 357 358 359 360 361 362 } 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatGetVecs_FFTW" 365 /* 366 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 367 parallel layout determined by FFTW 368 369 Collective on Mat 370 371 Input Parameter: 372 . mat - the matrix 373 374 Output Parameter: 375 + fin - (optional) input vector of forward FFTW 376 - fout - (optional) output vector of forward FFTW 377 378 Level: advanced 379 380 .seealso: MatCreateFFTW() 381 */ 382 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 383 { 384 PetscErrorCode ierr; 385 PetscMPIInt size,rank; 386 MPI_Comm comm=((PetscObject)A)->comm; 387 Mat_FFT *fft = (Mat_FFT*)A->data; 388 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 389 PetscInt N=fft->N; 390 391 PetscFunctionBegin; 392 #if !defined(PETSC_USE_COMPLEX) 393 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 394 #endif 395 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 396 PetscValidType(A,1); 397 398 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 399 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 400 if (size == 1){ /* sequential case */ 401 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 402 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 403 } else { /* mpi case */ 404 ptrdiff_t alloc_local,local_n0,local_0_start; 405 ptrdiff_t local_n1,local_1_end; 406 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 407 fftw_complex *data_fin,*data_fout; 408 // PetscInt ctr; 409 // ptrdiff_t ndim1,*pdim; 410 // ndim1=(ptrdiff_t) ndim; 411 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 412 413 // for(ctr=0;ctr<ndim;ctr++) 414 // { 415 // pdim[ctr] = dim[ctr]; 416 // } 417 418 switch (ndim){ 419 case 1: 420 /* Get local size */ 421 /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */ 422 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 423 424 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 425 if (fin) { 426 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 427 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 428 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 429 } 430 if (fout) { 431 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 432 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 433 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 434 } 435 break; 436 case 2: 437 /* Get local size */ 438 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 439 if (fin) { 440 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 441 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 442 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 443 } 444 if (fout) { 445 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 446 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 447 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 448 } 449 break; 450 case 3: 451 /* Get local size */ 452 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 453 // printf("The quantity n is %d",n); 454 if (fin) { 455 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 456 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 457 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 458 } 459 if (fout) { 460 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 461 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 462 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 463 } 464 break; 465 default: 466 /* Get local size */ 467 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 468 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 469 // printf("The value of alloc local is %d",alloc_local); 470 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 471 // for(i=0;i<ndim;i++) 472 // { 473 // pdim[i]=dim[i];printf("%d",pdim[i]); 474 // } 475 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 476 // printf("The quantity n is %d",n); 477 if (fin) { 478 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 479 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 480 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 481 } 482 if (fout) { 483 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 484 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 485 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 486 } 487 break; 488 } 489 } 490 if (fin){ 491 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 492 } 493 if (fout){ 494 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 495 } 496 PetscFunctionReturn(0); 497 } 498 499 EXTERN_C_BEGIN 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatCreate_FFTW" 502 /* 503 MatCreate_FFTW - Creates a matrix object that provides FFT 504 via the external package FFTW 505 506 Options Database Keys: 507 + -mat_fftw_plannerflags - set FFTW planner flags 508 509 Level: intermediate 510 511 */ 512 PetscErrorCode MatCreate_FFTW(Mat A) 513 { 514 PetscErrorCode ierr; 515 MPI_Comm comm=((PetscObject)A)->comm; 516 Mat_FFT *fft=(Mat_FFT*)A->data; 517 Mat_FFTW *fftw; 518 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 519 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 520 PetscBool flg; 521 PetscInt p_flag,partial_dim=1,ctr; 522 PetscMPIInt size,rank; 523 ptrdiff_t *pdim; 524 525 PetscFunctionBegin; 526 #if !defined(PETSC_USE_COMPLEX) 527 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 528 #endif 529 530 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 531 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 532 533 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 534 pdim[0] = dim[0]; 535 for(ctr=1;ctr<ndim;ctr++) 536 { 537 partial_dim*= dim[ctr]; 538 pdim[ctr] = dim[ctr]; 539 } 540 // printf("partial dimension is %d",partial_dim); 541 if (size == 1) { 542 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 543 n = N; 544 } else { 545 ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 546 switch (ndim){ 547 case 1: 548 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 549 n = (PetscInt)local_n0; 550 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 551 // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs_FFTW1D_C","MatGetVecs_FFTW1D",MatGetVecs_FFTW1D); 552 break; 553 case 2: 554 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 555 /* 556 PetscMPIInt rank; 557 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 558 PetscSynchronizedFlush(comm); 559 */ 560 n = (PetscInt)local_n0*dim[1]; 561 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 562 break; 563 case 3: 564 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 565 // printf("The value of alloc local is %d",alloc_local); 566 n = (PetscInt)local_n0*dim[1]*dim[2]; 567 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 568 break; 569 default: 570 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 571 // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 572 // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 573 n = (PetscInt)local_n0*partial_dim; 574 // printf("New partial dimension is %d %d %d",n,N,ndim); 575 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 576 break; 577 } 578 } 579 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 580 581 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 582 fft->data = (void*)fftw; 583 584 fft->n = n; 585 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 586 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 587 // (fftw->dim_fftw)=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 588 for(ctr=0;ctr<ndim;ctr++) 589 { 590 (fftw->dim_fftw)[ctr]=dim[ctr]; 591 } 592 593 fftw->p_forward = 0; 594 fftw->p_backward = 0; 595 fftw->p_flag = FFTW_ESTIMATE; 596 597 if (size == 1){ 598 A->ops->mult = MatMult_SeqFFTW; 599 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 600 } else { 601 A->ops->mult = MatMult_MPIFFTW; 602 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 603 } 604 fft->matdestroy = MatDestroy_FFTW; 605 A->ops->getvecs = MatGetVecs_FFTW; 606 A->assembled = PETSC_TRUE; 607 608 /* get runtime options */ 609 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 610 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 611 if (flg) {fftw->p_flag = (unsigned)p_flag;} 612 PetscOptionsEnd(); 613 PetscFunctionReturn(0); 614 } 615 EXTERN_C_END 616