1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 28 29 #undef __FUNCT__ 30 #define __FUNCT__ "MatMult_SeqFFTW" 31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32 { 33 PetscErrorCode ierr; 34 Mat_FFT *fft = (Mat_FFT*)A->data; 35 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36 PetscScalar *x_array,*y_array; 37 PetscInt ndim=fft->ndim,*dim=fft->dim; 38 39 PetscFunctionBegin; 40 #if !defined(PETSC_USE_COMPLEX) 41 42 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 43 #endif 44 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 45 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 46 if (!fftw->p_forward){ /* create a plan, then excute it */ 47 switch (ndim){ 48 case 1: 49 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 50 break; 51 case 2: 52 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 53 break; 54 case 3: 55 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 56 break; 57 default: 58 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 59 break; 60 } 61 fftw->finarray = x_array; 62 fftw->foutarray = y_array; 63 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 64 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 65 fftw_execute(fftw->p_forward); 66 } else { /* use existing plan */ 67 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 68 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 69 } else { 70 fftw_execute(fftw->p_forward); 71 } 72 } 73 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 74 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 80 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 81 { 82 PetscErrorCode ierr; 83 Mat_FFT *fft = (Mat_FFT*)A->data; 84 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 85 PetscScalar *x_array,*y_array; 86 PetscInt ndim=fft->ndim,*dim=fft->dim; 87 88 PetscFunctionBegin; 89 #if !defined(PETSC_USE_COMPLEX) 90 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 91 #endif 92 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 93 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 94 if (!fftw->p_backward){ /* create a plan, then excute it */ 95 switch (ndim){ 96 case 1: 97 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 98 break; 99 case 2: 100 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 101 break; 102 case 3: 103 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 104 break; 105 default: 106 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 107 break; 108 } 109 fftw->binarray = x_array; 110 fftw->boutarray = y_array; 111 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 112 } else { /* use existing plan */ 113 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 114 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 115 } else { 116 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 117 } 118 } 119 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 120 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatMult_MPIFFTW" 126 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 127 { 128 PetscErrorCode ierr; 129 Mat_FFT *fft = (Mat_FFT*)A->data; 130 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 131 PetscScalar *x_array,*y_array; 132 PetscInt ndim=fft->ndim,*dim=fft->dim; 133 MPI_Comm comm=((PetscObject)A)->comm; 134 // PetscInt ctr; 135 // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 136 // ndim1=(ptrdiff_t) ndim; 137 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 138 139 // for(ctr=0;ctr<ndim;ctr++) 140 // { 141 // pdim[ctr] = dim[ctr]; 142 // } 143 144 PetscFunctionBegin; 145 //#if !defined(PETSC_USE_COMPLEX) 146 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 147 //#endif 148 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 149 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 150 151 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 152 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 153 if (!fftw->p_forward){ /* create a plan, then excute it */ 154 switch (ndim){ 155 case 1: 156 #if defined(PETSC_USE_COMPLEX) 157 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 158 #endif 159 break; 160 case 2: 161 #if defined(PETSC_USE_COMPLEX) 162 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 163 #else 164 printf("The code comes here \n"); 165 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 166 #endif 167 break; 168 case 3: 169 #if defined(PETSC_USE_COMPLEX) 170 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 171 #else 172 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 173 #endif 174 break; 175 default: 176 #if defined(PETSC_USE_COMPLEX) 177 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 178 #else 179 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 180 #endif 181 // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 182 break; 183 } 184 fftw->finarray = x_array; 185 fftw->foutarray = y_array; 186 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 187 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 188 fftw_execute(fftw->p_forward); 189 } else { /* use existing plan */ 190 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 191 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 192 } else { 193 fftw_execute(fftw->p_forward); 194 } 195 } 196 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 197 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 203 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 204 { 205 PetscErrorCode ierr; 206 Mat_FFT *fft = (Mat_FFT*)A->data; 207 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 208 PetscScalar *x_array,*y_array; 209 PetscInt ndim=fft->ndim,*dim=fft->dim; 210 MPI_Comm comm=((PetscObject)A)->comm; 211 // PetscInt ctr; 212 // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 213 // ndim1=(ptrdiff_t) ndim; 214 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 215 216 // for(ctr=0;ctr<ndim;ctr++) 217 // { 218 // pdim[ctr] = dim[ctr]; 219 // } 220 221 PetscFunctionBegin; 222 //#if !defined(PETSC_USE_COMPLEX) 223 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 224 //#endif 225 // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 226 // should pdim be a member of Mat_FFTW? 227 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 228 229 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 230 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 231 if (!fftw->p_backward){ /* create a plan, then excute it */ 232 switch (ndim){ 233 case 1: 234 #if defined(PETSC_USE_COMPLEX) 235 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 236 #endif 237 break; 238 case 2: 239 #if defined(PETSC_USE_COMPLEX) 240 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 241 #else 242 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 243 #endif 244 break; 245 case 3: 246 #if defined(PETSC_USE_COMPLEX) 247 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 248 #else 249 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 250 #endif 251 break; 252 default: 253 #if defined(PETSC_USE_COMPLEX) 254 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 255 #else 256 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 257 #endif 258 // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 259 break; 260 } 261 fftw->binarray = x_array; 262 fftw->boutarray = y_array; 263 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264 } else { /* use existing plan */ 265 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 266 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 267 } else { 268 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 269 } 270 } 271 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 272 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatDestroy_FFTW" 278 PetscErrorCode MatDestroy_FFTW(Mat A) 279 { 280 Mat_FFT *fft = (Mat_FFT*)A->data; 281 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 282 PetscErrorCode ierr; 283 284 PetscFunctionBegin; 285 #if !defined(PETSC_USE_COMPLEX) 286 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 287 #endif 288 fftw_destroy_plan(fftw->p_forward); 289 fftw_destroy_plan(fftw->p_backward); 290 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 291 ierr = PetscFree(fft->data);CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 296 #undef __FUNCT__ 297 #define __FUNCT__ "VecDestroy_MPIFFTW" 298 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 299 { 300 PetscErrorCode ierr; 301 PetscScalar *array; 302 303 PetscFunctionBegin; 304 #if !defined(PETSC_USE_COMPLEX) 305 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 306 #endif 307 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 308 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 309 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 310 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatGetVecs1DC_FFTW" 316 /* 317 MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 318 parallel layout determined by FFTW-1D 319 320 */ 321 PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 322 { 323 PetscErrorCode ierr; 324 PetscMPIInt size,rank; 325 MPI_Comm comm=((PetscObject)A)->comm; 326 Mat_FFT *fft = (Mat_FFT*)A->data; 327 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 328 PetscInt N=fft->N; 329 PetscInt ndim=fft->ndim,*dim=fft->dim; 330 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 331 ptrdiff_t f_local_n1,f_local_1_end; 332 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 333 ptrdiff_t b_local_n1,b_local_1_end; 334 fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 335 336 PetscFunctionBegin; 337 #if !defined(PETSC_USE_COMPLEX) 338 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 339 #endif 340 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 341 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 342 if (size == 1){ 343 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 344 } 345 else { 346 if (ndim>1){ 347 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 348 else { 349 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 350 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 351 if (fin) { 352 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 353 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 354 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 355 } 356 if (fout) { 357 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 358 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 359 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 360 } 361 if (bin) { 362 data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 363 ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 364 (*bin)->ops->destroy = VecDestroy_MPIFFTW; 365 } 366 if (bout) { 367 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 368 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 369 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 370 } 371 } 372 if (fin){ 373 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 374 } 375 if (fout){ 376 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 377 } 378 if (bin){ 379 ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 380 } 381 if (bout){ 382 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 383 } 384 PetscFunctionReturn(0); 385 } 386 387 388 } 389 390 #undef __FUNCT__ 391 #define __FUNCT__ "MatGetVecs_FFTW" 392 /* 393 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 394 parallel layout determined by FFTW 395 396 Collective on Mat 397 398 Input Parameter: 399 . mat - the matrix 400 401 Output Parameter: 402 + fin - (optional) input vector of forward FFTW 403 - fout - (optional) output vector of forward FFTW 404 405 Level: advanced 406 407 .seealso: MatCreateFFTW() 408 */ 409 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 410 { 411 PetscErrorCode ierr; 412 PetscMPIInt size,rank; 413 MPI_Comm comm=((PetscObject)A)->comm; 414 Mat_FFT *fft = (Mat_FFT*)A->data; 415 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 416 PetscInt N=fft->N, N1, n1,vsize; 417 418 PetscFunctionBegin; 419 //#if !defined(PETSC_USE_COMPLEX) 420 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 421 //#endif 422 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 423 PetscValidType(A,1); 424 425 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 426 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 427 if (size == 1){ /* sequential case */ 428 #if defined(PETSC_USE_COMPLEX) 429 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 430 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 431 #else 432 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 433 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*(N/2+1),fout);CHKERRQ(ierr);} 434 #endif 435 printf("The code successfully comes at the end of the routine with one processor\n"); 436 } else { /* mpi case */ 437 ptrdiff_t alloc_local,local_n0,local_0_start; 438 ptrdiff_t local_n1,local_1_end; 439 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 440 fftw_complex *data_fin,*data_fout; 441 double *data_finr ; 442 ptrdiff_t local_1_start,temp; 443 // PetscInt ctr; 444 // ptrdiff_t ndim1,*pdim; 445 // ndim1=(ptrdiff_t) ndim; 446 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 447 448 // for(ctr=0;ctr<ndim;ctr++) 449 // {k 450 // pdim[ctr] = dim[ctr]; 451 // } 452 453 454 455 switch (ndim){ 456 case 1: 457 /* Get local size */ 458 /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 459 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 460 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 461 if (fin) { 462 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 463 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 464 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 465 } 466 if (fout) { 467 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 468 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 469 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 470 } 471 break; 472 case 2: 473 #if !defined(PETSC_USE_COMPLEX) 474 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 475 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 476 if (fin) { 477 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 478 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 479 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 480 //printf("The code comes here with vector size %d\n",vsize); 481 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 482 } 483 if (fout) { 484 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 485 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 486 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 487 } 488 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 489 490 #else 491 /* Get local size */ 492 printf("Hope this does not come here"); 493 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 494 if (fin) { 495 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 496 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 497 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 498 } 499 if (fout) { 500 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 501 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 502 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 503 } 504 printf("Hope this does not come here"); 505 #endif 506 break; 507 case 3: 508 /* Get local size */ 509 #if !defined(PETSC_USE_COMPLEX) 510 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 511 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 512 if (fin) { 513 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 514 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 515 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 516 //printf("The code comes here with vector size %d\n",vsize); 517 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 518 } 519 if (fout) { 520 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 521 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 522 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 523 } 524 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 525 526 527 #else 528 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 529 // printf("The quantity n is %d",n); 530 if (fin) { 531 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 532 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 533 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 534 } 535 if (fout) { 536 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 538 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 539 } 540 #endif 541 break; 542 default: 543 /* Get local size */ 544 #if !defined(PETSC_USE_COMPLEX) 545 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 546 printf("The value of temp is %ld\n",temp); 547 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 548 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 549 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 550 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 551 if (fin) { 552 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 553 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 554 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 555 //printf("The code comes here with vector size %d\n",vsize); 556 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 557 } 558 if (fout) { 559 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 560 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 561 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 562 } 563 564 #else 565 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 566 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 567 // printf("The value of alloc local is %d",alloc_local); 568 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 569 // for(i=0;i<ndim;i++) 570 // { 571 // pdim[i]=dim[i];printf("%d",pdim[i]); 572 // } 573 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 574 // printf("The quantity n is %d",n); 575 if (fin) { 576 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 577 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 578 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 579 } 580 if (fout) { 581 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 582 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 583 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 584 } 585 #endif 586 break; 587 } 588 } 589 // if (fin){ 590 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 591 // } 592 // if (fout){ 593 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 594 // } 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "InputTransformFFT" 600 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 601 { 602 PetscErrorCode ierr; 603 PetscFunctionBegin; 604 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 /* 609 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 610 Input A, x, y 611 A - FFTW matrix 612 x - user data 613 Options Database Keys: 614 + -mat_fftw_plannerflags - set FFTW planner flags 615 616 Level: intermediate 617 618 */ 619 620 EXTERN_C_BEGIN 621 #undef __FUNCT__ 622 #define __FUNCT__ "InputTransformFFT_FTTW" 623 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 624 { 625 PetscErrorCode ierr; 626 MPI_Comm comm=((PetscObject)A)->comm; 627 Mat_FFT *fft = (Mat_FFT*)A->data; 628 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 629 PetscInt N=fft->N, N1, n1 ,NM; 630 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 631 PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 632 PetscInt i,j,k,rank,size,partial_dim; 633 ptrdiff_t alloc_local,local_n0,local_0_start; 634 ptrdiff_t local_n1,local_1_start,temp; 635 VecScatter vecscat; 636 IS list1,list2; 637 638 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 639 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 640 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 641 printf("Local ownership starts at %d\n",low); 642 643 switch (ndim){ 644 case 1: 645 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 646 break; 647 case 2: 648 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 649 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 650 651 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 652 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 653 printf("Val local_0_start = %ld\n",local_0_start); 654 655 if (dim[1]%2==0) 656 NM = dim[1]+2; 657 else 658 NM = dim[1]+1; 659 660 for (i=0;i<local_n0;i++){ 661 for (j=0;j<dim[1];j++){ 662 tempindx = i*dim[1] + j; 663 tempindx1 = i*NM + j; 664 indx1[tempindx]=local_0_start*dim[1]+tempindx; 665 indx2[tempindx]=low+tempindx1; 666 // printf("Val tempindx1 = %d\n",tempindx1); 667 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 668 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 669 // printf("-------------------------\n",indx2[tempindx],rank); 670 } 671 } 672 673 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 674 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 675 676 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 677 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 678 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 679 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 680 break; 681 682 case 3: 683 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 684 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 685 686 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 687 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 688 printf("Val local_0_start = %ld\n",local_0_start); 689 690 if (dim[2]%2==0) 691 NM = dim[2]+2; 692 else 693 NM = dim[2]+1; 694 695 for (i=0;i<local_n0;i++){ 696 for (j=0;j<dim[1];j++){ 697 for (k=0;k<dim[2];k++){ 698 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 699 tempindx1 = i*dim[1]*NM + j*NM + k; 700 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 701 indx2[tempindx]=low+tempindx1; 702 } 703 // printf("Val tempindx1 = %d\n",tempindx1); 704 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 705 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 706 // printf("-------------------------\n",indx2[tempindx],rank); 707 } 708 } 709 710 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 711 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 712 713 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 714 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 715 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 716 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 717 break; 718 719 default: 720 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 721 printf("The value of temp is %ld\n",temp); 722 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 723 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 724 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 725 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 726 727 partial_dim = fftw->partial_dim; 728 printf("The value of partial dim is %d\n",partial_dim); 729 730 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 731 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 732 printf("Val local_0_start = %ld\n",local_0_start); 733 734 if (dim[ndim-1]%2==0) 735 NM = dim[ndim-1]+2; 736 else 737 NM = dim[ndim-1]+1; 738 739 740 741 break; 742 } 743 744 return 0; 745 } 746 EXTERN_C_END 747 748 #undef __FUNCT__ 749 #define __FUNCT__ "OutputTransformFFT" 750 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 751 { 752 PetscErrorCode ierr; 753 PetscFunctionBegin; 754 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 /* 759 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 760 Input A, x, y 761 A - FFTW matrix 762 x - FFTW vector 763 y - PETSc vector that user can read 764 Options Database Keys: 765 + -mat_fftw_plannerflags - set FFTW planner flags 766 767 Level: intermediate 768 769 */ 770 771 EXTERN_C_BEGIN 772 #undef __FUNCT__ 773 #define __FUNCT__ "OutputTransformFFT_FTTW" 774 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 775 { 776 PetscErrorCode ierr; 777 MPI_Comm comm=((PetscObject)A)->comm; 778 Mat_FFT *fft = (Mat_FFT*)A->data; 779 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 780 PetscInt N=fft->N, N1, n1 ,NM; 781 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 782 PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 783 PetscInt i,j,k,rank,size; 784 ptrdiff_t alloc_local,local_n0,local_0_start; 785 ptrdiff_t local_n1,local_1_start; 786 VecScatter vecscat; 787 IS list1,list2; 788 789 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 790 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 791 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 792 printf("Local ownership starts at %d\n",low); 793 794 switch (ndim){ 795 case 1: 796 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 797 break; 798 case 2: 799 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 800 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 801 802 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 803 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 804 printf("Val local_0_start = %ld\n",local_0_start); 805 806 if (dim[1]%2==0) 807 NM = dim[1]+2; 808 else 809 NM = dim[1]+1; 810 811 812 813 for (i=0;i<local_n0;i++){ 814 for (j=0;j<dim[1];j++){ 815 tempindx = i*dim[1] + j; 816 tempindx1 = i*NM + j; 817 indx1[tempindx]=local_0_start*dim[1]+tempindx; 818 indx2[tempindx]=low+tempindx1; 819 // printf("Val tempindx1 = %d\n",tempindx1); 820 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 821 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 822 // printf("-------------------------\n",indx2[tempindx],rank); 823 } 824 } 825 826 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 827 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 828 829 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 830 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 831 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 832 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 833 break; 834 835 case 3: 836 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 837 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 838 839 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 840 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 841 printf("Val local_0_start = %ld\n",local_0_start); 842 843 if (dim[2]%2==0) 844 NM = dim[2]+2; 845 else 846 NM = dim[2]+1; 847 848 for (i=0;i<local_n0;i++){ 849 for (j=0;j<dim[1];j++){ 850 for (k=0;k<dim[2];k++){ 851 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 852 tempindx1 = i*dim[1]*NM + j*NM + k; 853 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 854 indx2[tempindx]=low+tempindx1; 855 } 856 // printf("Val tempindx1 = %d\n",tempindx1); 857 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 858 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 859 // printf("-------------------------\n",indx2[tempindx],rank); 860 } 861 } 862 863 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 864 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 865 866 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 867 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 870 break; 871 872 default: 873 SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 874 break; 875 } 876 return 0; 877 } 878 EXTERN_C_END 879 880 EXTERN_C_BEGIN 881 #undef __FUNCT__ 882 #define __FUNCT__ "MatCreate_FFTW" 883 /* 884 MatCreate_FFTW - Creates a matrix object that provides FFT 885 via the external package FFTW 886 Options Database Keys: 887 + -mat_fftw_plannerflags - set FFTW planner flags 888 889 Level: intermediate 890 891 */ 892 893 PetscErrorCode MatCreate_FFTW(Mat A) 894 { 895 PetscErrorCode ierr; 896 MPI_Comm comm=((PetscObject)A)->comm; 897 Mat_FFT *fft=(Mat_FFT*)A->data; 898 Mat_FFTW *fftw; 899 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 900 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 901 PetscBool flg; 902 PetscInt p_flag,partial_dim=1,ctr,N1; 903 PetscMPIInt size,rank; 904 ptrdiff_t *pdim, temp; 905 ptrdiff_t local_n1,local_1_start; 906 907 PetscFunctionBegin; 908 //#if !defined(PETSC_USE_COMPLEX) 909 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 910 //#endif 911 912 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 913 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 914 ierr = MPI_Barrier(PETSC_COMM_WORLD); 915 916 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 917 pdim[0] = dim[0]; 918 for(ctr=1;ctr<ndim;ctr++) 919 { 920 partial_dim *= dim[ctr]; 921 pdim[ctr] = dim[ctr]; 922 } 923 //#if !defined(PETSC_USE_COMPLEX) 924 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 925 //#endif 926 927 // printf("partial dimension is %d",partial_dim); 928 if (size == 1) { 929 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 930 n = N; 931 } else { 932 ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 933 switch (ndim){ 934 case 1: 935 #if !defined(PETSC_USE_COMPLEX) 936 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 937 #else 938 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 939 n = (PetscInt)local_n0; 940 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 941 #endif 942 // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 943 break; 944 case 2: 945 #if defined(PETSC_USE_COMPLEX) 946 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 947 /* 948 PetscMPIInt rank; 949 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 950 PetscSynchronizedFlush(comm); 951 */ 952 n = (PetscInt)local_n0*dim[1]; 953 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 954 #else 955 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 956 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 957 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 958 #endif 959 break; 960 case 3: 961 // printf("The value of alloc local is %d",alloc_local); 962 #if defined(PETSC_USE_COMPLEX) 963 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 964 n = (PetscInt)local_n0*dim[1]*dim[2]; 965 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 966 #else 967 printf("The code comes here\n"); 968 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 969 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 970 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 971 #endif 972 break; 973 default: 974 #if defined(PETSC_USE_COMPLEX) 975 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 976 // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 977 // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 978 n = (PetscInt)local_n0*partial_dim; 979 // printf("New partial dimension is %d %d %d",n,N,ndim); 980 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 981 #else 982 temp = pdim[ndim-1]; 983 pdim[ndim-1]= temp/2 + 1; 984 printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 985 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 986 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 987 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 988 pdim[ndim-1] = temp; 989 printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 990 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 991 #endif 992 break; 993 } 994 } 995 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 996 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 997 fft->data = (void*)fftw; 998 999 fft->n = n; 1000 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1001 fftw->partial_dim = partial_dim; 1002 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1003 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1004 1005 fftw->p_forward = 0; 1006 fftw->p_backward = 0; 1007 fftw->p_flag = FFTW_ESTIMATE; 1008 1009 if (size == 1){ 1010 A->ops->mult = MatMult_SeqFFTW; 1011 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1012 } else { 1013 A->ops->mult = MatMult_MPIFFTW; 1014 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1015 } 1016 fft->matdestroy = MatDestroy_FFTW; 1017 A->ops->getvecs = MatGetVecs_FFTW; 1018 A->assembled = PETSC_TRUE; 1019 #if !defined(PETSC_USE_COMPLEX) 1020 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 1021 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1022 #endif 1023 1024 /* get runtime options */ 1025 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1026 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1027 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1028 PetscOptionsEnd(); 1029 PetscFunctionReturn(0); 1030 } 1031 EXTERN_C_END 1032 1033 1034 1035 1036