1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 28 29 #undef __FUNCT__ 30 #define __FUNCT__ "MatMult_SeqFFTW" 31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32 { 33 PetscErrorCode ierr; 34 Mat_FFT *fft = (Mat_FFT*)A->data; 35 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36 PetscScalar *x_array,*y_array; 37 PetscInt ndim=fft->ndim,*dim=fft->dim; 38 39 PetscFunctionBegin; 40 //#if !defined(PETSC_USE_COMPLEX) 41 42 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 43 //#endif 44 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 45 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 46 if (!fftw->p_forward){ /* create a plan, then excute it */ 47 switch (ndim){ 48 case 1: 49 #if defined(PETSC_USE_COMPLEX) 50 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 51 #else 52 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 53 #endif 54 break; 55 case 2: 56 #if defined(PETSC_USE_COMPLEX) 57 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 58 #else 59 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 60 #endif 61 break; 62 case 3: 63 #if defined(PETSC_USE_COMPLEX) 64 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 65 #else 66 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 67 #endif 68 break; 69 default: 70 #if defined(PETSC_USE_COMPLEX) 71 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 72 #else 73 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 74 #endif 75 break; 76 } 77 fftw->finarray = x_array; 78 fftw->foutarray = y_array; 79 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 80 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 81 fftw_execute(fftw->p_forward); 82 } else { /* use existing plan */ 83 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 84 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 85 } else { 86 fftw_execute(fftw->p_forward); 87 } 88 } 89 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 90 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 96 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 97 { 98 PetscErrorCode ierr; 99 Mat_FFT *fft = (Mat_FFT*)A->data; 100 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 101 PetscScalar *x_array,*y_array; 102 PetscInt ndim=fft->ndim,*dim=fft->dim; 103 104 PetscFunctionBegin; 105 #if !defined(PETSC_USE_COMPLEX) 106 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 107 #endif 108 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 109 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 110 if (!fftw->p_backward){ /* create a plan, then excute it */ 111 switch (ndim){ 112 case 1: 113 #if defined(PETSC_USE_COMPLEX) 114 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 115 #else 116 fftw->p_forward = fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 117 #endif 118 break; 119 case 2: 120 #if defined(PETSC_USE_COMPLEX) 121 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 122 #else 123 fftw->p_forward = fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 124 #endif 125 break; 126 case 3: 127 #if defined(PETSC_USE_COMPLEX) 128 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 129 #else 130 fftw->p_forward = fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 131 #endif 132 break; 133 default: 134 #if defined(PETSC_USE_COMPLEX) 135 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 136 #else 137 fftw->p_forward = fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 138 #endif 139 break; 140 } 141 fftw->binarray = x_array; 142 fftw->boutarray = y_array; 143 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 144 } else { /* use existing plan */ 145 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 146 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 147 } else { 148 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 149 } 150 } 151 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 152 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatMult_MPIFFTW" 158 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 159 { 160 PetscErrorCode ierr; 161 Mat_FFT *fft = (Mat_FFT*)A->data; 162 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 163 PetscScalar *x_array,*y_array; 164 PetscInt ndim=fft->ndim,*dim=fft->dim; 165 MPI_Comm comm=((PetscObject)A)->comm; 166 // PetscInt ctr; 167 // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 168 // ndim1=(ptrdiff_t) ndim; 169 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 170 171 // for(ctr=0;ctr<ndim;ctr++) 172 // { 173 // pdim[ctr] = dim[ctr]; 174 // } 175 176 PetscFunctionBegin; 177 //#if !defined(PETSC_USE_COMPLEX) 178 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 179 //#endif 180 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 181 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 182 183 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 184 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 185 if (!fftw->p_forward){ /* create a plan, then excute it */ 186 switch (ndim){ 187 case 1: 188 #if defined(PETSC_USE_COMPLEX) 189 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 190 #endif 191 break; 192 case 2: 193 #if defined(PETSC_USE_COMPLEX) 194 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 195 #else 196 printf("The code comes here \n"); 197 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 198 #endif 199 break; 200 case 3: 201 #if defined(PETSC_USE_COMPLEX) 202 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 203 #else 204 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 205 #endif 206 break; 207 default: 208 #if defined(PETSC_USE_COMPLEX) 209 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 210 #else 211 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 212 #endif 213 // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 214 break; 215 } 216 fftw->finarray = x_array; 217 fftw->foutarray = y_array; 218 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 219 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 220 fftw_execute(fftw->p_forward); 221 } else { /* use existing plan */ 222 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 223 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 224 } else { 225 fftw_execute(fftw->p_forward); 226 } 227 } 228 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 229 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 235 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 236 { 237 PetscErrorCode ierr; 238 Mat_FFT *fft = (Mat_FFT*)A->data; 239 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 240 PetscScalar *x_array,*y_array; 241 PetscInt ndim=fft->ndim,*dim=fft->dim; 242 MPI_Comm comm=((PetscObject)A)->comm; 243 // PetscInt ctr; 244 // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 245 // ndim1=(ptrdiff_t) ndim; 246 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 247 248 // for(ctr=0;ctr<ndim;ctr++) 249 // { 250 // pdim[ctr] = dim[ctr]; 251 // } 252 253 PetscFunctionBegin; 254 //#if !defined(PETSC_USE_COMPLEX) 255 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 256 //#endif 257 // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 258 // should pdim be a member of Mat_FFTW? 259 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 260 261 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 262 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 263 if (!fftw->p_backward){ /* create a plan, then excute it */ 264 switch (ndim){ 265 case 1: 266 #if defined(PETSC_USE_COMPLEX) 267 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 268 #endif 269 break; 270 case 2: 271 #if defined(PETSC_USE_COMPLEX) 272 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 273 #else 274 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 275 #endif 276 break; 277 case 3: 278 #if defined(PETSC_USE_COMPLEX) 279 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 280 #else 281 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 282 #endif 283 break; 284 default: 285 #if defined(PETSC_USE_COMPLEX) 286 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 287 #else 288 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 289 #endif 290 // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 291 break; 292 } 293 fftw->binarray = x_array; 294 fftw->boutarray = y_array; 295 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 296 } else { /* use existing plan */ 297 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 298 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 299 } else { 300 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 301 } 302 } 303 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 304 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatDestroy_FFTW" 310 PetscErrorCode MatDestroy_FFTW(Mat A) 311 { 312 Mat_FFT *fft = (Mat_FFT*)A->data; 313 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 314 PetscErrorCode ierr; 315 316 PetscFunctionBegin; 317 #if !defined(PETSC_USE_COMPLEX) 318 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 319 #endif 320 fftw_destroy_plan(fftw->p_forward); 321 fftw_destroy_plan(fftw->p_backward); 322 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 323 ierr = PetscFree(fft->data);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 328 #undef __FUNCT__ 329 #define __FUNCT__ "VecDestroy_MPIFFTW" 330 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 331 { 332 PetscErrorCode ierr; 333 PetscScalar *array; 334 335 PetscFunctionBegin; 336 #if !defined(PETSC_USE_COMPLEX) 337 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 338 #endif 339 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 340 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 341 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 342 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "MatGetVecs1DC_FFTW" 348 /* 349 MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 350 parallel layout determined by FFTW-1D 351 352 */ 353 PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 354 { 355 PetscErrorCode ierr; 356 PetscMPIInt size,rank; 357 MPI_Comm comm=((PetscObject)A)->comm; 358 Mat_FFT *fft = (Mat_FFT*)A->data; 359 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 360 PetscInt N=fft->N; 361 PetscInt ndim=fft->ndim,*dim=fft->dim; 362 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 363 ptrdiff_t f_local_n1,f_local_1_end; 364 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 365 ptrdiff_t b_local_n1,b_local_1_end; 366 fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 367 368 PetscFunctionBegin; 369 #if !defined(PETSC_USE_COMPLEX) 370 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 371 #endif 372 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 373 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 374 if (size == 1){ 375 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 376 } 377 else { 378 if (ndim>1){ 379 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 380 else { 381 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 382 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 383 if (fin) { 384 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 385 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 386 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 387 } 388 if (fout) { 389 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 390 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 391 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 392 } 393 if (bin) { 394 data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 395 ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 396 (*bin)->ops->destroy = VecDestroy_MPIFFTW; 397 } 398 if (bout) { 399 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 400 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 401 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 402 } 403 } 404 if (fin){ 405 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 406 } 407 if (fout){ 408 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 409 } 410 if (bin){ 411 ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 412 } 413 if (bout){ 414 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 415 } 416 PetscFunctionReturn(0); 417 } 418 419 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatGetVecs_FFTW" 424 /* 425 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 426 parallel layout determined by FFTW 427 428 Collective on Mat 429 430 Input Parameter: 431 . mat - the matrix 432 433 Output Parameter: 434 + fin - (optional) input vector of forward FFTW 435 - fout - (optional) output vector of forward FFTW 436 437 Level: advanced 438 439 .seealso: MatCreateFFTW() 440 */ 441 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 442 { 443 PetscErrorCode ierr; 444 PetscMPIInt size,rank; 445 MPI_Comm comm=((PetscObject)A)->comm; 446 Mat_FFT *fft = (Mat_FFT*)A->data; 447 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 448 PetscInt N=fft->N, N1, n1,vsize; 449 450 PetscFunctionBegin; 451 //#if !defined(PETSC_USE_COMPLEX) 452 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 453 //#endif 454 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 455 PetscValidType(A,1); 456 457 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 458 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 459 if (size == 1){ /* sequential case */ 460 #if defined(PETSC_USE_COMPLEX) 461 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 462 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 463 #else 464 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 465 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*(N/2+1),fout);CHKERRQ(ierr);} 466 #endif 467 printf("The code successfully comes at the end of the routine with one processor\n"); 468 } else { /* mpi case */ 469 ptrdiff_t alloc_local,local_n0,local_0_start; 470 ptrdiff_t local_n1,local_1_end; 471 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 472 fftw_complex *data_fin,*data_fout; 473 double *data_finr ; 474 ptrdiff_t local_1_start,temp; 475 // PetscInt ctr; 476 // ptrdiff_t ndim1,*pdim; 477 // ndim1=(ptrdiff_t) ndim; 478 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 479 480 // for(ctr=0;ctr<ndim;ctr++) 481 // {k 482 // pdim[ctr] = dim[ctr]; 483 // } 484 485 486 487 switch (ndim){ 488 case 1: 489 /* Get local size */ 490 /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 491 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 492 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 493 if (fin) { 494 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 495 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 496 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 497 } 498 if (fout) { 499 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 500 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 501 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 502 } 503 break; 504 case 2: 505 #if !defined(PETSC_USE_COMPLEX) 506 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 507 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 508 if (fin) { 509 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 510 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 511 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 512 //printf("The code comes here with vector size %d\n",vsize); 513 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 514 } 515 if (fout) { 516 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 517 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 518 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 521 522 #else 523 /* Get local size */ 524 printf("Hope this does not come here"); 525 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 526 if (fin) { 527 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 528 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 529 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 530 } 531 if (fout) { 532 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 533 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 534 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 535 } 536 printf("Hope this does not come here"); 537 #endif 538 break; 539 case 3: 540 /* Get local size */ 541 #if !defined(PETSC_USE_COMPLEX) 542 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 543 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 544 if (fin) { 545 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 546 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 547 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 548 //printf("The code comes here with vector size %d\n",vsize); 549 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 550 } 551 if (fout) { 552 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 553 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 554 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 555 } 556 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 557 558 559 #else 560 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 561 // printf("The quantity n is %d",n); 562 if (fin) { 563 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 564 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 565 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 566 } 567 if (fout) { 568 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 569 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 570 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 571 } 572 #endif 573 break; 574 default: 575 /* Get local size */ 576 #if !defined(PETSC_USE_COMPLEX) 577 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 578 printf("The value of temp is %ld\n",temp); 579 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 580 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 581 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 582 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 583 if (fin) { 584 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 585 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 586 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 587 //printf("The code comes here with vector size %d\n",vsize); 588 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 589 } 590 if (fout) { 591 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 592 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 593 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 594 } 595 596 #else 597 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 598 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 599 // printf("The value of alloc local is %d",alloc_local); 600 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 601 // for(i=0;i<ndim;i++) 602 // { 603 // pdim[i]=dim[i];printf("%d",pdim[i]); 604 // } 605 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 606 // printf("The quantity n is %d",n); 607 if (fin) { 608 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 609 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 610 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 611 } 612 if (fout) { 613 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 614 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 615 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 616 } 617 #endif 618 break; 619 } 620 } 621 // if (fin){ 622 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 623 // } 624 // if (fout){ 625 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 626 // } 627 PetscFunctionReturn(0); 628 } 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "InputTransformFFT" 632 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 633 { 634 PetscErrorCode ierr; 635 PetscFunctionBegin; 636 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } 639 640 /* 641 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 642 Input A, x, y 643 A - FFTW matrix 644 x - user data 645 Options Database Keys: 646 + -mat_fftw_plannerflags - set FFTW planner flags 647 648 Level: intermediate 649 650 */ 651 652 EXTERN_C_BEGIN 653 #undef __FUNCT__ 654 #define __FUNCT__ "InputTransformFFT_FTTW" 655 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 656 { 657 PetscErrorCode ierr; 658 MPI_Comm comm=((PetscObject)A)->comm; 659 Mat_FFT *fft = (Mat_FFT*)A->data; 660 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 661 PetscInt N=fft->N, N1, n1 ,NM; 662 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 663 PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 664 PetscInt i,j,k,rank,size,partial_dim; 665 ptrdiff_t alloc_local,local_n0,local_0_start; 666 ptrdiff_t local_n1,local_1_start,temp; 667 VecScatter vecscat; 668 IS list1,list2; 669 670 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 671 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 672 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 673 printf("Local ownership starts at %d\n",low); 674 675 switch (ndim){ 676 case 1: 677 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 678 break; 679 case 2: 680 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 681 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 682 683 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 684 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 685 printf("Val local_0_start = %ld\n",local_0_start); 686 687 if (dim[1]%2==0) 688 NM = dim[1]+2; 689 else 690 NM = dim[1]+1; 691 692 for (i=0;i<local_n0;i++){ 693 for (j=0;j<dim[1];j++){ 694 tempindx = i*dim[1] + j; 695 tempindx1 = i*NM + j; 696 indx1[tempindx]=local_0_start*dim[1]+tempindx; 697 indx2[tempindx]=low+tempindx1; 698 // printf("Val tempindx1 = %d\n",tempindx1); 699 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 700 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 701 // printf("-------------------------\n",indx2[tempindx],rank); 702 } 703 } 704 705 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 706 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 707 708 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 709 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 710 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 711 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 712 break; 713 714 case 3: 715 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 716 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 717 718 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 719 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 720 printf("Val local_0_start = %ld\n",local_0_start); 721 722 if (dim[2]%2==0) 723 NM = dim[2]+2; 724 else 725 NM = dim[2]+1; 726 727 for (i=0;i<local_n0;i++){ 728 for (j=0;j<dim[1];j++){ 729 for (k=0;k<dim[2];k++){ 730 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 731 tempindx1 = i*dim[1]*NM + j*NM + k; 732 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 733 indx2[tempindx]=low+tempindx1; 734 } 735 // printf("Val tempindx1 = %d\n",tempindx1); 736 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 737 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 738 // printf("-------------------------\n",indx2[tempindx],rank); 739 } 740 } 741 742 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 743 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 744 745 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 746 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 748 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 749 break; 750 751 default: 752 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 753 printf("The value of temp is %ld\n",temp); 754 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 755 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 756 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 757 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 758 759 partial_dim = fftw->partial_dim; 760 printf("The value of partial dim is %d\n",partial_dim); 761 762 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 763 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 764 printf("Val local_0_start = %ld\n",local_0_start); 765 766 if (dim[ndim-1]%2==0) 767 NM = dim[ndim-1]+2; 768 else 769 NM = dim[ndim-1]+1; 770 771 772 773 break; 774 } 775 776 return 0; 777 } 778 EXTERN_C_END 779 780 #undef __FUNCT__ 781 #define __FUNCT__ "OutputTransformFFT" 782 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 783 { 784 PetscErrorCode ierr; 785 PetscFunctionBegin; 786 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 787 PetscFunctionReturn(0); 788 } 789 790 /* 791 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 792 Input A, x, y 793 A - FFTW matrix 794 x - FFTW vector 795 y - PETSc vector that user can read 796 Options Database Keys: 797 + -mat_fftw_plannerflags - set FFTW planner flags 798 799 Level: intermediate 800 801 */ 802 803 EXTERN_C_BEGIN 804 #undef __FUNCT__ 805 #define __FUNCT__ "OutputTransformFFT_FTTW" 806 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 807 { 808 PetscErrorCode ierr; 809 MPI_Comm comm=((PetscObject)A)->comm; 810 Mat_FFT *fft = (Mat_FFT*)A->data; 811 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 812 PetscInt N=fft->N, N1, n1 ,NM; 813 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 814 PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 815 PetscInt i,j,k,rank,size; 816 ptrdiff_t alloc_local,local_n0,local_0_start; 817 ptrdiff_t local_n1,local_1_start; 818 VecScatter vecscat; 819 IS list1,list2; 820 821 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 822 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 823 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 824 printf("Local ownership starts at %d\n",low); 825 826 switch (ndim){ 827 case 1: 828 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 829 break; 830 case 2: 831 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 832 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 833 834 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 835 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 836 printf("Val local_0_start = %ld\n",local_0_start); 837 838 if (dim[1]%2==0) 839 NM = dim[1]+2; 840 else 841 NM = dim[1]+1; 842 843 844 845 for (i=0;i<local_n0;i++){ 846 for (j=0;j<dim[1];j++){ 847 tempindx = i*dim[1] + j; 848 tempindx1 = i*NM + j; 849 indx1[tempindx]=local_0_start*dim[1]+tempindx; 850 indx2[tempindx]=low+tempindx1; 851 // printf("Val tempindx1 = %d\n",tempindx1); 852 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 853 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 854 // printf("-------------------------\n",indx2[tempindx],rank); 855 } 856 } 857 858 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 859 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 860 861 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 862 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 865 break; 866 867 case 3: 868 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 869 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 870 871 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 872 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 873 printf("Val local_0_start = %ld\n",local_0_start); 874 875 if (dim[2]%2==0) 876 NM = dim[2]+2; 877 else 878 NM = dim[2]+1; 879 880 for (i=0;i<local_n0;i++){ 881 for (j=0;j<dim[1];j++){ 882 for (k=0;k<dim[2];k++){ 883 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 884 tempindx1 = i*dim[1]*NM + j*NM + k; 885 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 886 indx2[tempindx]=low+tempindx1; 887 } 888 // printf("Val tempindx1 = %d\n",tempindx1); 889 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 890 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 891 // printf("-------------------------\n",indx2[tempindx],rank); 892 } 893 } 894 895 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 896 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 897 898 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 899 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 902 break; 903 904 default: 905 SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 906 break; 907 } 908 return 0; 909 } 910 EXTERN_C_END 911 912 EXTERN_C_BEGIN 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatCreate_FFTW" 915 /* 916 MatCreate_FFTW - Creates a matrix object that provides FFT 917 via the external package FFTW 918 Options Database Keys: 919 + -mat_fftw_plannerflags - set FFTW planner flags 920 921 Level: intermediate 922 923 */ 924 925 PetscErrorCode MatCreate_FFTW(Mat A) 926 { 927 PetscErrorCode ierr; 928 MPI_Comm comm=((PetscObject)A)->comm; 929 Mat_FFT *fft=(Mat_FFT*)A->data; 930 Mat_FFTW *fftw; 931 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 932 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 933 PetscBool flg; 934 PetscInt p_flag,partial_dim=1,ctr,N1; 935 PetscMPIInt size,rank; 936 ptrdiff_t *pdim, temp; 937 ptrdiff_t local_n1,local_1_start; 938 939 PetscFunctionBegin; 940 //#if !defined(PETSC_USE_COMPLEX) 941 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 942 //#endif 943 944 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 945 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 946 ierr = MPI_Barrier(PETSC_COMM_WORLD); 947 948 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 949 pdim[0] = dim[0]; 950 for(ctr=1;ctr<ndim;ctr++) 951 { 952 partial_dim *= dim[ctr]; 953 pdim[ctr] = dim[ctr]; 954 } 955 //#if !defined(PETSC_USE_COMPLEX) 956 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 957 //#endif 958 959 // printf("partial dimension is %d",partial_dim); 960 if (size == 1) { 961 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 962 n = N; 963 } else { 964 ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 965 switch (ndim){ 966 case 1: 967 #if !defined(PETSC_USE_COMPLEX) 968 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 969 #else 970 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 971 n = (PetscInt)local_n0; 972 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 973 #endif 974 // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 975 break; 976 case 2: 977 #if defined(PETSC_USE_COMPLEX) 978 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 979 /* 980 PetscMPIInt rank; 981 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 982 PetscSynchronizedFlush(comm); 983 */ 984 n = (PetscInt)local_n0*dim[1]; 985 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 986 #else 987 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 988 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 989 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 990 #endif 991 break; 992 case 3: 993 // printf("The value of alloc local is %d",alloc_local); 994 #if defined(PETSC_USE_COMPLEX) 995 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 996 n = (PetscInt)local_n0*dim[1]*dim[2]; 997 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 998 #else 999 printf("The code comes here\n"); 1000 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1001 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1002 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1003 #endif 1004 break; 1005 default: 1006 #if defined(PETSC_USE_COMPLEX) 1007 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1008 // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 1009 // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 1010 n = (PetscInt)local_n0*partial_dim; 1011 // printf("New partial dimension is %d %d %d",n,N,ndim); 1012 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1013 #else 1014 temp = pdim[ndim-1]; 1015 pdim[ndim-1]= temp/2 + 1; 1016 printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 1017 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1018 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1019 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1020 pdim[ndim-1] = temp; 1021 printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 1022 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1023 #endif 1024 break; 1025 } 1026 } 1027 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1028 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1029 fft->data = (void*)fftw; 1030 1031 fft->n = n; 1032 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1033 fftw->partial_dim = partial_dim; 1034 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1035 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1036 1037 fftw->p_forward = 0; 1038 fftw->p_backward = 0; 1039 fftw->p_flag = FFTW_ESTIMATE; 1040 1041 if (size == 1){ 1042 A->ops->mult = MatMult_SeqFFTW; 1043 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1044 } else { 1045 A->ops->mult = MatMult_MPIFFTW; 1046 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1047 } 1048 fft->matdestroy = MatDestroy_FFTW; 1049 A->ops->getvecs = MatGetVecs_FFTW; 1050 A->assembled = PETSC_TRUE; 1051 #if !defined(PETSC_USE_COMPLEX) 1052 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 1053 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1054 #endif 1055 1056 /* get runtime options */ 1057 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1058 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1059 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1060 PetscOptionsEnd(); 1061 PetscFunctionReturn(0); 1062 } 1063 EXTERN_C_END 1064 1065 1066 1067 1068