1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 28 29 #undef __FUNCT__ 30 #define __FUNCT__ "MatMult_SeqFFTW" 31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32 { 33 PetscErrorCode ierr; 34 Mat_FFT *fft = (Mat_FFT*)A->data; 35 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36 PetscScalar *x_array,*y_array; 37 PetscInt ndim=fft->ndim,*dim=fft->dim; 38 39 PetscFunctionBegin; 40 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42 if (!fftw->p_forward){ /* create a plan, then excute it */ 43 switch (ndim){ 44 case 1: 45 #if defined(PETSC_USE_COMPLEX) 46 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 47 #else 48 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 49 #endif 50 break; 51 case 2: 52 #if defined(PETSC_USE_COMPLEX) 53 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 #else 55 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 56 #endif 57 break; 58 case 3: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 63 #endif 64 break; 65 default: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 } 73 fftw->finarray = x_array; 74 fftw->foutarray = y_array; 75 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77 fftw_execute(fftw->p_forward); 78 } else { /* use existing plan */ 79 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81 } else { 82 fftw_execute(fftw->p_forward); 83 } 84 } 85 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93 { 94 PetscErrorCode ierr; 95 Mat_FFT *fft = (Mat_FFT*)A->data; 96 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97 PetscScalar *x_array,*y_array; 98 PetscInt ndim=fft->ndim,*dim=fft->dim; 99 100 PetscFunctionBegin; 101 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103 if (!fftw->p_backward){ /* create a plan, then excute it */ 104 switch (ndim){ 105 case 1: 106 #if defined(PETSC_USE_COMPLEX) 107 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 108 #else 109 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 110 #endif 111 break; 112 case 2: 113 #if defined(PETSC_USE_COMPLEX) 114 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 115 #else 116 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 117 #endif 118 break; 119 case 3: 120 #if defined(PETSC_USE_COMPLEX) 121 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 122 #else 123 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 124 #endif 125 break; 126 default: 127 #if defined(PETSC_USE_COMPLEX) 128 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 129 #else 130 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 131 #endif 132 break; 133 } 134 fftw->binarray = x_array; 135 fftw->boutarray = y_array; 136 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137 } else { /* use existing plan */ 138 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140 } else { 141 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142 } 143 } 144 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatMult_MPIFFTW" 151 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152 { 153 PetscErrorCode ierr; 154 Mat_FFT *fft = (Mat_FFT*)A->data; 155 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156 PetscScalar *x_array,*y_array; 157 PetscInt ndim=fft->ndim,*dim=fft->dim; 158 MPI_Comm comm=((PetscObject)A)->comm; 159 160 PetscFunctionBegin; 161 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163 if (!fftw->p_forward){ /* create a plan, then excute it */ 164 switch (ndim){ 165 case 1: 166 #if defined(PETSC_USE_COMPLEX) 167 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168 #else 169 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 170 #endif 171 break; 172 case 2: 173 #if defined(PETSC_USE_COMPLEX) 174 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 175 #else 176 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 177 #endif 178 break; 179 case 3: 180 #if defined(PETSC_USE_COMPLEX) 181 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 182 #else 183 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 184 #endif 185 break; 186 default: 187 #if defined(PETSC_USE_COMPLEX) 188 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 189 #else 190 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 191 #endif 192 break; 193 } 194 fftw->finarray = x_array; 195 fftw->foutarray = y_array; 196 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198 fftw_execute(fftw->p_forward); 199 } else { /* use existing plan */ 200 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202 } else { 203 fftw_execute(fftw->p_forward); 204 } 205 } 206 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214 { 215 PetscErrorCode ierr; 216 Mat_FFT *fft = (Mat_FFT*)A->data; 217 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218 PetscScalar *x_array,*y_array; 219 PetscInt ndim=fft->ndim,*dim=fft->dim; 220 MPI_Comm comm=((PetscObject)A)->comm; 221 222 PetscFunctionBegin; 223 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225 if (!fftw->p_backward){ /* create a plan, then excute it */ 226 switch (ndim){ 227 case 1: 228 #if defined(PETSC_USE_COMPLEX) 229 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230 #else 231 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 232 #endif 233 break; 234 case 2: 235 #if defined(PETSC_USE_COMPLEX) 236 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 237 #else 238 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 239 #endif 240 break; 241 case 3: 242 #if defined(PETSC_USE_COMPLEX) 243 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 244 #else 245 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 246 #endif 247 break; 248 default: 249 #if defined(PETSC_USE_COMPLEX) 250 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 251 #else 252 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 253 #endif 254 break; 255 } 256 fftw->binarray = x_array; 257 fftw->boutarray = y_array; 258 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259 } else { /* use existing plan */ 260 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262 } else { 263 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264 } 265 } 266 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDestroy_FFTW" 273 PetscErrorCode MatDestroy_FFTW(Mat A) 274 { 275 Mat_FFT *fft = (Mat_FFT*)A->data; 276 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 fftw_destroy_plan(fftw->p_forward); 281 fftw_destroy_plan(fftw->p_backward); 282 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283 ierr = PetscFree(fft->data);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288 #undef __FUNCT__ 289 #define __FUNCT__ "VecDestroy_MPIFFTW" 290 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291 { 292 PetscErrorCode ierr; 293 PetscScalar *array; 294 295 PetscFunctionBegin; 296 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatGetVecs1DC_FFTW" 305 /* 306 MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 307 parallel layout determined by FFTW-1D 308 309 */ 310 PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout) 311 { 312 PetscErrorCode ierr; 313 PetscMPIInt size,rank; 314 MPI_Comm comm=((PetscObject)A)->comm; 315 Mat_FFT *fft = (Mat_FFT*)A->data; 316 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 317 PetscInt N=fft->N; 318 PetscInt ndim=fft->ndim,*dim=fft->dim; 319 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 320 ptrdiff_t f_local_n1,f_local_1_end; 321 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 322 ptrdiff_t b_local_n1,b_local_1_end; 323 fftw_complex *data_fin,*data_fout,*data_bout; 324 325 PetscFunctionBegin; 326 #if !defined(PETSC_USE_COMPLEX) 327 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 328 #endif 329 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 330 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 331 if (size == 1){ 332 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 333 } 334 else { 335 if (ndim>1){ 336 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 337 else { 338 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 339 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 340 if (fin) { 341 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 342 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 343 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 344 } 345 if (fout) { 346 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 347 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 348 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 349 } 350 if (bout) { 351 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 352 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 353 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 354 } 355 } 356 if (fin){ 357 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 358 } 359 if (fout){ 360 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 361 } 362 if (bout){ 363 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 364 } 365 PetscFunctionReturn(0); 366 } 367 368 369 } 370 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatGetVecs_FFTW" 373 /* 374 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 375 parallel layout determined by FFTW 376 377 Collective on Mat 378 379 Input Parameter: 380 . mat - the matrix 381 382 Output Parameter: 383 + fin - (optional) input vector of forward FFTW 384 - fout - (optional) output vector of forward FFTW 385 386 Level: advanced 387 388 .seealso: MatCreateFFTW() 389 */ 390 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 391 { 392 PetscErrorCode ierr; 393 PetscMPIInt size,rank; 394 MPI_Comm comm=((PetscObject)A)->comm; 395 Mat_FFT *fft = (Mat_FFT*)A->data; 396 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 397 PetscInt N=fft->N, N1, n1,vsize; 398 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 399 400 PetscFunctionBegin; 401 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 402 PetscValidType(A,1); 403 404 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 405 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 406 if (size == 1){ /* sequential case */ 407 #if defined(PETSC_USE_COMPLEX) 408 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 409 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 410 #else 411 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 412 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 413 #endif 414 } else { /* mpi case */ 415 ptrdiff_t alloc_local,local_n0,local_0_start; 416 ptrdiff_t local_n1,local_1_end; 417 fftw_complex *data_fin,*data_fout; 418 double *data_finr ; 419 ptrdiff_t local_1_start,temp; 420 // PetscInt ctr; 421 // ptrdiff_t ndim1,*pdim; 422 // ndim1=(ptrdiff_t) ndim; 423 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 424 425 // for(ctr=0;ctr<ndim;ctr++) 426 // {k 427 // pdim[ctr] = dim[ctr]; 428 // } 429 430 431 432 switch (ndim){ 433 case 1: 434 /* Get local size */ 435 /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 436 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 437 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 438 if (fin) { 439 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 440 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 441 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 442 } 443 if (fout) { 444 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 445 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 446 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 447 } 448 break; 449 case 2: 450 #if !defined(PETSC_USE_COMPLEX) 451 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 452 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 453 if (fin) { 454 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 455 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 456 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 457 //printf("The code comes here with vector size %d\n",vsize); 458 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 459 } 460 if (fout) { 461 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 462 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 463 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 464 } 465 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 466 467 #else 468 /* Get local size */ 469 //printf("Hope this does not come here"); 470 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 471 if (fin) { 472 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 473 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 474 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 475 } 476 if (fout) { 477 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 478 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 479 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 //printf("Hope this does not come here"); 482 #endif 483 break; 484 case 3: 485 /* Get local size */ 486 #if !defined(PETSC_USE_COMPLEX) 487 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 488 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 489 if (fin) { 490 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 491 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 492 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 493 //printf("The code comes here with vector size %d\n",vsize); 494 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 495 } 496 if (fout) { 497 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 498 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 499 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 500 } 501 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 502 503 504 #else 505 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 506 // printf("The quantity n is %d",n); 507 if (fin) { 508 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 509 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 510 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 511 } 512 if (fout) { 513 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 514 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 515 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 516 } 517 #endif 518 break; 519 default: 520 /* Get local size */ 521 #if !defined(PETSC_USE_COMPLEX) 522 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 523 printf("The value of temp is %ld\n",temp); 524 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 525 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 526 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 527 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 528 if (fin) { 529 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 530 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 531 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 532 //printf("The code comes here with vector size %d\n",vsize); 533 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 534 } 535 if (fout) { 536 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 538 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 539 } 540 541 #else 542 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 543 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 544 // printf("The value of alloc local is %d",alloc_local); 545 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 546 // for(i=0;i<ndim;i++) 547 // { 548 // pdim[i]=dim[i];printf("%d",pdim[i]); 549 // } 550 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 551 // printf("The quantity n is %d",n); 552 if (fin) { 553 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 554 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 555 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 556 } 557 if (fout) { 558 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 559 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 560 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 561 } 562 #endif 563 break; 564 } 565 } 566 // if (fin){ 567 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 568 // } 569 // if (fout){ 570 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 571 // } 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "InputTransformFFT" 577 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 578 { 579 PetscErrorCode ierr; 580 PetscFunctionBegin; 581 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 582 PetscFunctionReturn(0); 583 } 584 585 /* 586 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 587 Input A, x, y 588 A - FFTW matrix 589 x - user data 590 Options Database Keys: 591 + -mat_fftw_plannerflags - set FFTW planner flags 592 593 Level: intermediate 594 595 */ 596 597 EXTERN_C_BEGIN 598 #undef __FUNCT__ 599 #define __FUNCT__ "InputTransformFFT_FTTW" 600 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 601 { 602 PetscErrorCode ierr; 603 MPI_Comm comm=((PetscObject)A)->comm; 604 Mat_FFT *fft = (Mat_FFT*)A->data; 605 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 606 PetscInt N=fft->N, N1, n1 ,NM; 607 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 608 PetscInt low, *indx1, *indx2, tempindx, tempindx1; 609 PetscInt i,j,k,rank,size,partial_dim; 610 ptrdiff_t alloc_local,local_n0,local_0_start; 611 ptrdiff_t local_n1,local_1_start,temp; 612 VecScatter vecscat; 613 IS list1,list2; 614 615 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 616 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 617 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 618 printf("Local ownership starts at %d\n",low); 619 620 if (size==1) 621 { 622 /* switch (ndim){ 623 case 1: 624 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 625 for (i=0;i<dim[0];i++) 626 { 627 indx1[i] = i; 628 } 629 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 630 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 631 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 632 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 633 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 634 ierr = ISDestroy(&list1);CHKERRQ(ierr); 635 ierr = PetscFree(indx1);CHKERRQ(ierr); 636 break; 637 638 case 2: 639 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 640 for (i=0;i<dim[0];i++){ 641 for (j=0;j<dim[1];j++){ 642 indx1[i*dim[1]+j] = i*dim[1] + j; 643 } 644 } 645 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 646 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 647 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 648 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 649 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 650 ierr = ISDestroy(&list1);CHKERRQ(ierr); 651 ierr = PetscFree(indx1);CHKERRQ(ierr); 652 break; 653 case 3: 654 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 655 for (i=0;i<dim[0];i++){ 656 for (j=0;j<dim[1];j++){ 657 for (k=0;k<dim[2];k++){ 658 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 659 } 660 } 661 } 662 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 663 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 664 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 665 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 666 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 667 ierr = ISDestroy(&list1);CHKERRQ(ierr); 668 ierr = PetscFree(indx1);CHKERRQ(ierr); 669 break; 670 default: 671 */ 672 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1); 673 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 674 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 675 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 676 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 677 ierr = ISDestroy(&list1);CHKERRQ(ierr); 678 //ierr = ISDestroy(list1);CHKERRQ(ierr); 679 // break; 680 // } 681 } 682 683 else{ 684 685 switch (ndim){ 686 case 1: 687 #if defined(PETSC_USE_COMPLEX) 688 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 689 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 690 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 691 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 692 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 693 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 694 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 695 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 696 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 697 ierr = ISDestroy(&list1);CHKERRQ(ierr); 698 ierr = ISDestroy(&list2);CHKERRQ(ierr); 699 break; 700 #else 701 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 702 #endif 703 break; 704 case 2: 705 #if defined(PETSC_USE_COMPLEX) 706 //PetscInt my_value; 707 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 708 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 709 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 710 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 711 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 712 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 713 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 714 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 715 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 716 ierr = ISDestroy(&list1);CHKERRQ(ierr); 717 ierr = ISDestroy(&list2);CHKERRQ(ierr); 718 break; 719 #else 720 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 721 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 722 723 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 724 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 725 printf("Val local_0_start = %ld\n",local_0_start); 726 727 if (dim[1]%2==0) 728 NM = dim[1]+2; 729 else 730 NM = dim[1]+1; 731 732 for (i=0;i<local_n0;i++){ 733 for (j=0;j<dim[1];j++){ 734 tempindx = i*dim[1] + j; 735 tempindx1 = i*NM + j; 736 indx1[tempindx]=local_0_start*dim[1]+tempindx; 737 indx2[tempindx]=low+tempindx1; 738 // printf("Val tempindx1 = %d\n",tempindx1); 739 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 740 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 741 // printf("-------------------------\n",indx2[tempindx],rank); 742 } 743 } 744 745 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 746 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 747 748 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 749 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 750 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 751 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 752 ierr = ISDestroy(&list1);CHKERRQ(ierr); 753 ierr = ISDestroy(&list2);CHKERRQ(ierr); 754 ierr = PetscFree(indx1);CHKERRQ(ierr); 755 ierr = PetscFree(indx2);CHKERRQ(ierr); 756 break; 757 #endif 758 759 case 3: 760 #if defined(PETSC_USE_COMPLEX) 761 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 762 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 763 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 764 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 765 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 766 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 767 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 768 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 769 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 770 ierr = ISDestroy(&list1);CHKERRQ(ierr); 771 ierr = ISDestroy(&list2);CHKERRQ(ierr); 772 break; 773 #else 774 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 775 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 776 777 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 778 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 779 printf("Val local_0_start = %ld\n",local_0_start); 780 781 if (dim[2]%2==0) 782 NM = dim[2]+2; 783 else 784 NM = dim[2]+1; 785 786 for (i=0;i<local_n0;i++){ 787 for (j=0;j<dim[1];j++){ 788 for (k=0;k<dim[2];k++){ 789 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 790 tempindx1 = i*dim[1]*NM + j*NM + k; 791 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 792 indx2[tempindx]=low+tempindx1; 793 } 794 // printf("Val tempindx1 = %d\n",tempindx1); 795 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 796 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 797 // printf("-------------------------\n",indx2[tempindx],rank); 798 } 799 } 800 801 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 802 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 803 804 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 805 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 808 ierr = ISDestroy(&list1);CHKERRQ(ierr); 809 ierr = ISDestroy(&list2);CHKERRQ(ierr); 810 ierr = PetscFree(indx1);CHKERRQ(ierr); 811 ierr = PetscFree(indx2);CHKERRQ(ierr); 812 break; 813 #endif 814 815 default: 816 #if defined(PETSC_USE_COMPLEX) 817 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 818 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 819 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 820 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 821 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 822 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 823 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 826 ierr = ISDestroy(&list1);CHKERRQ(ierr); 827 ierr = ISDestroy(&list2);CHKERRQ(ierr); 828 829 830 #else 831 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 832 printf("The value of temp is %ld\n",temp); 833 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 834 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 835 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 836 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 837 838 partial_dim = fftw->partial_dim; 839 printf("The value of partial dim is %d\n",partial_dim); 840 841 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 842 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 843 printf("Val local_0_start = %ld\n",local_0_start); 844 845 if (dim[ndim-1]%2==0) 846 NM = 2; 847 else 848 NM = 1; 849 850 j = low; 851 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 852 { 853 indx1[i] = local_0_start*partial_dim + i; 854 indx2[i] = j; 855 //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 856 if (k%dim[ndim-1]==0) 857 { j+=NM;} 858 j++; 859 } 860 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 861 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 862 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 863 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 866 ierr = ISDestroy(&list1);CHKERRQ(ierr); 867 ierr = ISDestroy(&list2);CHKERRQ(ierr); 868 ierr = PetscFree(indx1);CHKERRQ(ierr); 869 ierr = PetscFree(indx2);CHKERRQ(ierr); 870 break; 871 #endif 872 } 873 } 874 875 return 0; 876 } 877 EXTERN_C_END 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "OutputTransformFFT" 881 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 882 { 883 PetscErrorCode ierr; 884 PetscFunctionBegin; 885 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 /* 890 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 891 Input A, x, y 892 A - FFTW matrix 893 x - FFTW vector 894 y - PETSc vector that user can read 895 Options Database Keys: 896 + -mat_fftw_plannerflags - set FFTW planner flags 897 898 Level: intermediate 899 900 */ 901 902 EXTERN_C_BEGIN 903 #undef __FUNCT__ 904 #define __FUNCT__ "OutputTransformFFT_FTTW" 905 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 906 { 907 PetscErrorCode ierr; 908 MPI_Comm comm=((PetscObject)A)->comm; 909 Mat_FFT *fft = (Mat_FFT*)A->data; 910 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 911 PetscInt N=fft->N, N1, n1 ,NM; 912 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 913 PetscInt low, *indx1, *indx2, tempindx, tempindx1; 914 PetscInt i,j,k,rank,size,partial_dim; 915 ptrdiff_t alloc_local,local_n0,local_0_start; 916 ptrdiff_t local_n1,local_1_start,temp; 917 VecScatter vecscat; 918 IS list1,list2; 919 920 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 921 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 922 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 923 924 if (size==1){ 925 /* 926 switch (ndim){ 927 case 1: 928 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 929 for (i=0;i<dim[0];i++) 930 { 931 indx1[i] = i; 932 } 933 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 934 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 935 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 937 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 938 ierr = ISDestroy(&list1);CHKERRQ(ierr); 939 ierr = PetscFree(indx1);CHKERRQ(ierr); 940 break; 941 942 case 2: 943 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 944 for (i=0;i<dim[0];i++){ 945 for (j=0;j<dim[1];j++){ 946 indx1[i*dim[1]+j] = i*dim[1] + j; 947 } 948 } 949 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 950 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 951 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 953 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 954 ierr = ISDestroy(&list1);CHKERRQ(ierr); 955 ierr = PetscFree(indx1);CHKERRQ(ierr); 956 break; 957 case 3: 958 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 959 for (i=0;i<dim[0];i++){ 960 for (j=0;j<dim[1];j++){ 961 for (k=0;k<dim[2];k++){ 962 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 963 } 964 } 965 } 966 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 967 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 968 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 969 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 970 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 971 ierr = ISDestroy(&list1);CHKERRQ(ierr); 972 ierr = PetscFree(indx1);CHKERRQ(ierr); 973 break; 974 default: 975 */ 976 ierr = ISCreateStride(comm,N,0,1,&list1); 977 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 978 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 979 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 982 ierr = ISDestroy(&list1);CHKERRQ(ierr); 983 // break; 984 // } 985 } 986 else{ 987 988 switch (ndim){ 989 case 1: 990 #if defined(PETSC_USE_COMPLEX) 991 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 992 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 993 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 994 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 995 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 996 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 997 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1000 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1001 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1002 break; 1003 1004 #else 1005 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1006 #endif 1007 break; 1008 case 2: 1009 #if defined(PETSC_USE_COMPLEX) 1010 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1011 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1012 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1013 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1014 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1017 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1018 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1019 break; 1020 #else 1021 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1022 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 1023 1024 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1025 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1026 printf("Val local_0_start = %ld\n",local_0_start); 1027 1028 if (dim[1]%2==0) 1029 NM = dim[1]+2; 1030 else 1031 NM = dim[1]+1; 1032 1033 1034 1035 for (i=0;i<local_n0;i++){ 1036 for (j=0;j<dim[1];j++){ 1037 tempindx = i*dim[1] + j; 1038 tempindx1 = i*NM + j; 1039 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1040 indx2[tempindx]=low+tempindx1; 1041 // printf("Val tempindx1 = %d\n",tempindx1); 1042 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1043 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1044 // printf("-------------------------\n",indx2[tempindx],rank); 1045 } 1046 } 1047 1048 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1049 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1050 1051 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1052 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1055 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1056 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1057 ierr = PetscFree(indx1);CHKERRQ(ierr); 1058 ierr = PetscFree(indx2);CHKERRQ(ierr); 1059 break; 1060 #endif 1061 case 3: 1062 #if defined(PETSC_USE_COMPLEX) 1063 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1064 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1065 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1066 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1067 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1068 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1069 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1071 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1072 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1073 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1074 break; 1075 #else 1076 1077 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1078 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1079 1080 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1081 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1082 printf("Val local_0_start = %ld\n",local_0_start); 1083 1084 if (dim[2]%2==0) 1085 NM = dim[2]+2; 1086 else 1087 NM = dim[2]+1; 1088 1089 for (i=0;i<local_n0;i++){ 1090 for (j=0;j<dim[1];j++){ 1091 for (k=0;k<dim[2];k++){ 1092 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1093 tempindx1 = i*dim[1]*NM + j*NM + k; 1094 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1095 indx2[tempindx]=low+tempindx1; 1096 } 1097 // printf("Val tempindx1 = %d\n",tempindx1); 1098 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1099 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1100 // printf("-------------------------\n",indx2[tempindx],rank); 1101 } 1102 } 1103 1104 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1105 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1106 1107 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1108 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1109 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1111 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1112 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1113 ierr = PetscFree(indx1);CHKERRQ(ierr); 1114 ierr = PetscFree(indx2);CHKERRQ(ierr); 1115 break; 1116 #endif 1117 default: 1118 #if defined(PETSC_USE_COMPLEX) 1119 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1120 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1121 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1122 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1123 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1124 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1125 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1126 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1127 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1128 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1129 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1130 #else 1131 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1132 printf("The value of temp is %ld\n",temp); 1133 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1134 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1135 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1136 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1137 1138 partial_dim = fftw->partial_dim; 1139 printf("The value of partial dim is %d\n",partial_dim); 1140 1141 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1142 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1143 printf("Val local_0_start = %ld\n",local_0_start); 1144 1145 if (dim[ndim-1]%2==0) 1146 NM = 2; 1147 else 1148 NM = 1; 1149 1150 j = low; 1151 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1152 { 1153 indx1[i] = local_0_start*partial_dim + i; 1154 indx2[i] = j; 1155 //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1156 if (k%dim[ndim-1]==0) 1157 { j+=NM;} 1158 j++; 1159 } 1160 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1161 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1162 1163 //ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1164 1165 1166 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1167 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1170 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1171 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1172 ierr = PetscFree(indx1);CHKERRQ(ierr); 1173 ierr = PetscFree(indx2);CHKERRQ(ierr); 1174 1175 break; 1176 #endif 1177 } 1178 } 1179 return 0; 1180 } 1181 EXTERN_C_END 1182 1183 EXTERN_C_BEGIN 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatCreate_FFTW" 1186 /* 1187 MatCreate_FFTW - Creates a matrix object that provides FFT 1188 via the external package FFTW 1189 Options Database Keys: 1190 + -mat_fftw_plannerflags - set FFTW planner flags 1191 1192 Level: intermediate 1193 1194 */ 1195 1196 PetscErrorCode MatCreate_FFTW(Mat A) 1197 { 1198 PetscErrorCode ierr; 1199 MPI_Comm comm=((PetscObject)A)->comm; 1200 Mat_FFT *fft=(Mat_FFT*)A->data; 1201 Mat_FFTW *fftw; 1202 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1203 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1204 PetscBool flg; 1205 PetscInt p_flag,partial_dim=1,ctr,N1; 1206 PetscMPIInt size,rank; 1207 ptrdiff_t *pdim, temp; 1208 ptrdiff_t local_n1,local_1_start,local_1_end; 1209 1210 PetscFunctionBegin; 1211 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1212 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1213 ierr = MPI_Barrier(PETSC_COMM_WORLD); 1214 1215 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1216 pdim[0] = dim[0]; 1217 for(ctr=1;ctr<ndim;ctr++) 1218 { 1219 partial_dim *= dim[ctr]; 1220 pdim[ctr] = dim[ctr]; 1221 } 1222 1223 if (size == 1) { 1224 #if defined(PETSC_USE_COMPLEX) 1225 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1226 n = N; 1227 #else 1228 int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1229 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1230 n = tot_dim; 1231 #endif 1232 1233 } else { 1234 ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end; 1235 switch (ndim){ 1236 case 1: 1237 #if !defined(PETSC_USE_COMPLEX) 1238 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1239 #else 1240 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 1241 n = (PetscInt)local_n0; 1242 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1243 #endif 1244 // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 1245 break; 1246 case 2: 1247 #if defined(PETSC_USE_COMPLEX) 1248 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1249 /* 1250 PetscMPIInt rank; 1251 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1252 PetscSynchronizedFlush(comm); 1253 */ 1254 n = (PetscInt)local_n0*dim[1]; 1255 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1256 #else 1257 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1258 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1259 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1260 #endif 1261 break; 1262 case 3: 1263 // printf("The value of alloc local is %d",alloc_local); 1264 #if defined(PETSC_USE_COMPLEX) 1265 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1266 n = (PetscInt)local_n0*dim[1]*dim[2]; 1267 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1268 #else 1269 printf("The code comes here\n"); 1270 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1271 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1272 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1273 #endif 1274 break; 1275 default: 1276 #if defined(PETSC_USE_COMPLEX) 1277 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1278 // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 1279 // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 1280 n = (PetscInt)local_n0*partial_dim; 1281 // printf("New partial dimension is %d %d %d",n,N,ndim); 1282 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1283 #else 1284 temp = pdim[ndim-1]; 1285 pdim[ndim-1]= temp/2 + 1; 1286 printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 1287 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1288 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1289 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1290 pdim[ndim-1] = temp; 1291 printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 1292 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1293 #endif 1294 break; 1295 } 1296 } 1297 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1298 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1299 fft->data = (void*)fftw; 1300 1301 fft->n = n; 1302 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1303 fftw->partial_dim = partial_dim; 1304 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1305 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1306 1307 fftw->p_forward = 0; 1308 fftw->p_backward = 0; 1309 fftw->p_flag = FFTW_ESTIMATE; 1310 1311 if (size == 1){ 1312 A->ops->mult = MatMult_SeqFFTW; 1313 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1314 } else { 1315 A->ops->mult = MatMult_MPIFFTW; 1316 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1317 } 1318 fft->matdestroy = MatDestroy_FFTW; 1319 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 1320 A->ops->getvecs = MatGetVecs_FFTW; 1321 A->assembled = PETSC_TRUE; 1322 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 1323 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1324 1325 /* get runtime options */ 1326 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1327 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1328 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1329 PetscOptionsEnd(); 1330 PetscFunctionReturn(0); 1331 } 1332 EXTERN_C_END 1333 1334 1335 1336 1337