1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 28 extern PetscErrorCode MatGetVecsFFT_FFTW(Mat,Vec*,Vec*,Vec*); 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMult_SeqFFTW" 32 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 33 { 34 PetscErrorCode ierr; 35 Mat_FFT *fft = (Mat_FFT*)A->data; 36 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 37 PetscScalar *x_array,*y_array; 38 PetscInt ndim=fft->ndim,*dim=fft->dim; 39 40 PetscFunctionBegin; 41 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 42 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 43 if (!fftw->p_forward){ /* create a plan, then excute it */ 44 switch (ndim){ 45 case 1: 46 #if defined(PETSC_USE_COMPLEX) 47 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 48 #else 49 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 50 #endif 51 break; 52 case 2: 53 #if defined(PETSC_USE_COMPLEX) 54 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 55 #else 56 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 57 #endif 58 break; 59 case 3: 60 #if defined(PETSC_USE_COMPLEX) 61 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 62 #else 63 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 64 #endif 65 break; 66 default: 67 #if defined(PETSC_USE_COMPLEX) 68 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 69 #else 70 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 71 #endif 72 break; 73 } 74 fftw->finarray = x_array; 75 fftw->foutarray = y_array; 76 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 77 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 78 fftw_execute(fftw->p_forward); 79 } else { /* use existing plan */ 80 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 81 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 82 } else { 83 fftw_execute(fftw->p_forward); 84 } 85 } 86 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 87 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 93 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 94 { 95 PetscErrorCode ierr; 96 Mat_FFT *fft = (Mat_FFT*)A->data; 97 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 98 PetscScalar *x_array,*y_array; 99 PetscInt ndim=fft->ndim,*dim=fft->dim; 100 101 PetscFunctionBegin; 102 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 103 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 104 if (!fftw->p_backward){ /* create a plan, then excute it */ 105 switch (ndim){ 106 case 1: 107 #if defined(PETSC_USE_COMPLEX) 108 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 109 #else 110 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 111 #endif 112 break; 113 case 2: 114 #if defined(PETSC_USE_COMPLEX) 115 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 116 #else 117 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 118 #endif 119 break; 120 case 3: 121 #if defined(PETSC_USE_COMPLEX) 122 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 123 #else 124 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 125 #endif 126 break; 127 default: 128 #if defined(PETSC_USE_COMPLEX) 129 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 130 #else 131 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 132 #endif 133 break; 134 } 135 fftw->binarray = x_array; 136 fftw->boutarray = y_array; 137 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 138 } else { /* use existing plan */ 139 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 140 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 141 } else { 142 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 143 } 144 } 145 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 146 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 #undef __FUNCT__ 151 #define __FUNCT__ "MatMult_MPIFFTW" 152 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 153 { 154 PetscErrorCode ierr; 155 Mat_FFT *fft = (Mat_FFT*)A->data; 156 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 157 PetscScalar *x_array,*y_array; 158 PetscInt ndim=fft->ndim,*dim=fft->dim; 159 MPI_Comm comm=((PetscObject)A)->comm; 160 161 PetscFunctionBegin; 162 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 163 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 164 if (!fftw->p_forward){ /* create a plan, then excute it */ 165 switch (ndim){ 166 case 1: 167 #if defined(PETSC_USE_COMPLEX) 168 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 169 #else 170 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 171 #endif 172 break; 173 case 2: 174 #if defined(PETSC_USE_COMPLEX) 175 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 176 #else 177 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 178 #endif 179 break; 180 case 3: 181 #if defined(PETSC_USE_COMPLEX) 182 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 183 #else 184 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 185 #endif 186 break; 187 default: 188 #if defined(PETSC_USE_COMPLEX) 189 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 190 #else 191 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 192 #endif 193 break; 194 } 195 fftw->finarray = x_array; 196 fftw->foutarray = y_array; 197 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 198 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 199 fftw_execute(fftw->p_forward); 200 } else { /* use existing plan */ 201 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 202 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 203 } else { 204 fftw_execute(fftw->p_forward); 205 } 206 } 207 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 208 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 214 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 215 { 216 PetscErrorCode ierr; 217 Mat_FFT *fft = (Mat_FFT*)A->data; 218 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 219 PetscScalar *x_array,*y_array; 220 PetscInt ndim=fft->ndim,*dim=fft->dim; 221 MPI_Comm comm=((PetscObject)A)->comm; 222 223 PetscFunctionBegin; 224 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 225 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 226 if (!fftw->p_backward){ /* create a plan, then excute it */ 227 switch (ndim){ 228 case 1: 229 #if defined(PETSC_USE_COMPLEX) 230 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 231 #else 232 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 233 #endif 234 break; 235 case 2: 236 #if defined(PETSC_USE_COMPLEX) 237 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 238 #else 239 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 240 #endif 241 break; 242 case 3: 243 #if defined(PETSC_USE_COMPLEX) 244 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 245 #else 246 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 247 #endif 248 break; 249 default: 250 #if defined(PETSC_USE_COMPLEX) 251 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 252 #else 253 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 254 #endif 255 break; 256 } 257 fftw->binarray = x_array; 258 fftw->boutarray = y_array; 259 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 260 } else { /* use existing plan */ 261 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 262 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 263 } else { 264 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 265 } 266 } 267 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 268 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatDestroy_FFTW" 274 PetscErrorCode MatDestroy_FFTW(Mat A) 275 { 276 Mat_FFT *fft = (Mat_FFT*)A->data; 277 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 fftw_destroy_plan(fftw->p_forward); 282 fftw_destroy_plan(fftw->p_backward); 283 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 284 ierr = PetscFree(fft->data);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 289 #undef __FUNCT__ 290 #define __FUNCT__ "VecDestroy_MPIFFTW" 291 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 292 { 293 PetscErrorCode ierr; 294 PetscScalar *array; 295 296 PetscFunctionBegin; 297 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 298 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 299 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 300 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatGetVecs1DC_FFTW" 306 /* 307 MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 308 parallel layout determined by FFTW-1D 309 310 */ 311 PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout) 312 { 313 PetscErrorCode ierr; 314 PetscMPIInt size,rank; 315 MPI_Comm comm=((PetscObject)A)->comm; 316 Mat_FFT *fft = (Mat_FFT*)A->data; 317 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 318 PetscInt N=fft->N; 319 PetscInt ndim=fft->ndim,*dim=fft->dim; 320 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 321 ptrdiff_t f_local_n1,f_local_1_end; 322 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 323 ptrdiff_t b_local_n1,b_local_1_end; 324 fftw_complex *data_fin,*data_fout,*data_bout; 325 326 PetscFunctionBegin; 327 #if !defined(PETSC_USE_COMPLEX) 328 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 329 #endif 330 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 331 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 332 if (size == 1){ 333 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 334 } 335 else { 336 if (ndim>1){ 337 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 338 else { 339 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 340 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 341 if (fin) { 342 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 343 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 344 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 345 } 346 if (fout) { 347 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 348 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 349 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 350 } 351 if (bout) { 352 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 353 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 354 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 355 } 356 } 357 if (fin){ 358 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 359 } 360 if (fout){ 361 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 362 } 363 if (bout){ 364 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 365 } 366 PetscFunctionReturn(0); 367 } 368 369 370 } 371 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "MatGetVecsFFT" 375 PetscErrorCode MatGetVecsFFT(Mat A,Vec *x,Vec *y,Vec *z) 376 { 377 PetscErrorCode ierr; 378 PetscFunctionBegin; 379 ierr = PetscTryMethod(A,"MatGetVecsFFT_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 380 PetscFunctionReturn(0); 381 } 382 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatGetVecsFFT_FFTW" 385 /* 386 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 387 parallel layout determined by FFTW 388 389 Collective on Mat 390 391 Input Parameter: 392 . mat - the matrix 393 394 Output Parameter: 395 + fin - (optional) input vector of forward FFTW 396 - fout - (optional) output vector of forward FFTW 397 398 Level: advanced 399 400 .seealso: MatCreateFFTW() 401 */ 402 EXTERN_C_BEGIN 403 PetscErrorCode MatGetVecsFFT_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 404 { 405 PetscErrorCode ierr; 406 PetscMPIInt size,rank; 407 MPI_Comm comm=((PetscObject)A)->comm; 408 Mat_FFT *fft = (Mat_FFT*)A->data; 409 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 410 PetscInt N=fft->N, N1, n1,vsize; 411 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 412 413 PetscFunctionBegin; 414 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 415 PetscValidType(A,1); 416 417 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 418 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 419 printf("size is %d\n",size); 420 if (size == 1){ /* sequential case */ 421 printf("Routine is getting called\n"); 422 #if defined(PETSC_USE_COMPLEX) 423 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 424 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 425 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 426 #else 427 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 428 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 429 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 430 #endif 431 } else { 432 ptrdiff_t alloc_local,local_n0,local_0_start; 433 ptrdiff_t local_n1,local_1_end; 434 fftw_complex *data_fin,*data_fout,data_bout; 435 double *data_finr,*data_boutr ; 436 ptrdiff_t local_1_start,temp; 437 switch (ndim){ 438 case 1: 439 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 440 case 2: 441 #if !defined(PETSC_USE_COMPLEX) 442 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 443 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 444 if (fin) { 445 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 446 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 447 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 448 //printf("The code comes here with vector size %d\n",vsize); 449 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 450 } 451 if (fout) { 452 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 453 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 454 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 455 } 456 if (bout) { 457 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 458 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 459 ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 460 //printf("The code comes here with vector size %d\n",vsize); 461 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 462 } 463 464 //printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 465 466 #else 467 /* Get local size */ 468 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 469 if (fin) { 470 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 471 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 472 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 473 } 474 if (fout) { 475 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 476 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 477 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 478 } 479 if (bout) { 480 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 481 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 482 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 483 } 484 485 //printf("Hope this does not come here"); 486 #endif 487 break; 488 489 case 3: 490 #if !defined(PETSC_USE_COMPLEX) 491 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 492 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 493 if (fin) { 494 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 495 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 496 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 497 //printf("The code comes here with vector size %d\n",vsize); 498 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 499 } 500 if (fout) { 501 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 502 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 503 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 504 } 505 if (bout) { 506 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 507 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 508 // ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 509 //printf("The code comes here with vector size %d\n",vsize); 510 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 511 } 512 513 //printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 514 515 #else 516 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 517 #endif 518 break; 519 default: 520 #if !defined(PETSC_USE_COMPLEX) 521 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 522 printf("The value of temp is %ld\n",temp); 523 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 524 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 525 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 526 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 527 528 if (fin) { 529 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 530 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 531 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 532 //printf("The code comes here with vector size %d\n",vsize); 533 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 534 } 535 if (fout) { 536 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 538 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 539 } 540 if (bout) { 541 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 542 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 543 //ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 544 //printf("The code comes here with vector size %d\n",vsize); 545 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 546 } 547 548 #else 549 550 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 551 #endif 552 break; 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 EXTERN_C_END 558 559 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatGetVecs_FFTW" 563 /* 564 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 565 parallel layout determined by FFTW 566 567 Collective on Mat 568 569 Input Parameter: 570 . mat - the matrix 571 572 Output Parameter: 573 + fin - (optional) input vector of forward FFTW 574 - fout - (optional) output vector of forward FFTW 575 576 Level: advanced 577 578 .seealso: MatCreateFFTW() 579 */ 580 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 581 { 582 PetscErrorCode ierr; 583 PetscMPIInt size,rank; 584 MPI_Comm comm=((PetscObject)A)->comm; 585 Mat_FFT *fft = (Mat_FFT*)A->data; 586 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 587 PetscInt N=fft->N, N1, n1,vsize; 588 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 589 590 PetscFunctionBegin; 591 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 592 PetscValidType(A,1); 593 594 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 595 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 596 if (size == 1){ /* sequential case */ 597 #if defined(PETSC_USE_COMPLEX) 598 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 599 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 600 #else 601 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 602 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 603 #endif 604 } else { /* mpi case */ 605 ptrdiff_t alloc_local,local_n0,local_0_start; 606 ptrdiff_t local_n1,local_1_end; 607 fftw_complex *data_fin,*data_fout; 608 double *data_finr ; 609 ptrdiff_t local_1_start,temp; 610 // PetscInt ctr; 611 // ptrdiff_t ndim1,*pdim; 612 // ndim1=(ptrdiff_t) ndim; 613 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 614 615 // for(ctr=0;ctr<ndim;ctr++) 616 // {k 617 // pdim[ctr] = dim[ctr]; 618 // } 619 620 621 622 switch (ndim){ 623 case 1: 624 /* Get local size */ 625 /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 626 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 627 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 628 if (fin) { 629 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 630 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 631 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 632 } 633 if (fout) { 634 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 635 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 636 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 637 } 638 break; 639 case 2: 640 #if !defined(PETSC_USE_COMPLEX) 641 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 642 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 643 if (fin) { 644 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 645 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 646 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 647 //printf("The code comes here with vector size %d\n",vsize); 648 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 649 } 650 if (fout) { 651 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 652 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 653 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 654 } 655 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 656 657 #else 658 /* Get local size */ 659 //printf("Hope this does not come here"); 660 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 661 if (fin) { 662 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 663 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 664 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 665 } 666 if (fout) { 667 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 668 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 669 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 670 } 671 //printf("Hope this does not come here"); 672 #endif 673 break; 674 case 3: 675 /* Get local size */ 676 #if !defined(PETSC_USE_COMPLEX) 677 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 678 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 679 if (fin) { 680 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 681 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 682 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 683 //printf("The code comes here with vector size %d\n",vsize); 684 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 685 } 686 if (fout) { 687 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 688 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 689 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 690 } 691 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 692 693 694 #else 695 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 696 // printf("The quantity n is %d",n); 697 if (fin) { 698 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 699 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 700 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 701 } 702 if (fout) { 703 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 704 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 705 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 706 } 707 #endif 708 break; 709 default: 710 /* Get local size */ 711 #if !defined(PETSC_USE_COMPLEX) 712 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 713 printf("The value of temp is %ld\n",temp); 714 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 715 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 716 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 717 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 718 if (fin) { 719 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 720 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 721 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 722 //printf("The code comes here with vector size %d\n",vsize); 723 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 724 } 725 if (fout) { 726 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 727 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 728 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 729 } 730 731 #else 732 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 733 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 734 // printf("The value of alloc local is %d",alloc_local); 735 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 736 // for(i=0;i<ndim;i++) 737 // { 738 // pdim[i]=dim[i];printf("%d",pdim[i]); 739 // } 740 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 741 // printf("The quantity n is %d",n); 742 if (fin) { 743 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 744 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 745 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 746 } 747 if (fout) { 748 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 749 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 750 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 751 } 752 #endif 753 break; 754 } 755 } 756 // if (fin){ 757 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 758 // } 759 // if (fout){ 760 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 761 // } 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "InputTransformFFT" 767 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 768 { 769 PetscErrorCode ierr; 770 PetscFunctionBegin; 771 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 772 PetscFunctionReturn(0); 773 } 774 775 /* 776 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 777 Input A, x, y 778 A - FFTW matrix 779 x - user data 780 Options Database Keys: 781 + -mat_fftw_plannerflags - set FFTW planner flags 782 783 Level: intermediate 784 785 */ 786 787 EXTERN_C_BEGIN 788 #undef __FUNCT__ 789 #define __FUNCT__ "InputTransformFFT_FTTW" 790 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 791 { 792 PetscErrorCode ierr; 793 MPI_Comm comm=((PetscObject)A)->comm; 794 Mat_FFT *fft = (Mat_FFT*)A->data; 795 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 796 PetscInt N=fft->N, N1, n1 ,NM; 797 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 798 PetscInt low, *indx1, *indx2, tempindx, tempindx1; 799 PetscInt i,j,k,rank,size,partial_dim; 800 ptrdiff_t alloc_local,local_n0,local_0_start; 801 ptrdiff_t local_n1,local_1_start,temp; 802 VecScatter vecscat; 803 IS list1,list2; 804 805 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 806 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 807 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 808 printf("Local ownership starts at %d\n",low); 809 810 if (size==1) 811 { 812 /* switch (ndim){ 813 case 1: 814 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 815 for (i=0;i<dim[0];i++) 816 { 817 indx1[i] = i; 818 } 819 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 820 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 821 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 822 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 824 ierr = ISDestroy(&list1);CHKERRQ(ierr); 825 ierr = PetscFree(indx1);CHKERRQ(ierr); 826 break; 827 828 case 2: 829 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 830 for (i=0;i<dim[0];i++){ 831 for (j=0;j<dim[1];j++){ 832 indx1[i*dim[1]+j] = i*dim[1] + j; 833 } 834 } 835 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 836 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 837 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 838 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 839 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 840 ierr = ISDestroy(&list1);CHKERRQ(ierr); 841 ierr = PetscFree(indx1);CHKERRQ(ierr); 842 break; 843 case 3: 844 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 845 for (i=0;i<dim[0];i++){ 846 for (j=0;j<dim[1];j++){ 847 for (k=0;k<dim[2];k++){ 848 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 849 } 850 } 851 } 852 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 853 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 854 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 857 ierr = ISDestroy(&list1);CHKERRQ(ierr); 858 ierr = PetscFree(indx1);CHKERRQ(ierr); 859 break; 860 default: 861 */ 862 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1); 863 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 864 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 866 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 867 ierr = ISDestroy(&list1);CHKERRQ(ierr); 868 //ierr = ISDestroy(list1);CHKERRQ(ierr); 869 // break; 870 // } 871 } 872 873 else{ 874 875 switch (ndim){ 876 case 1: 877 #if defined(PETSC_USE_COMPLEX) 878 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 879 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 880 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 881 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 882 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 883 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 884 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 887 ierr = ISDestroy(&list1);CHKERRQ(ierr); 888 ierr = ISDestroy(&list2);CHKERRQ(ierr); 889 break; 890 #else 891 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 892 #endif 893 break; 894 case 2: 895 #if defined(PETSC_USE_COMPLEX) 896 //PetscInt my_value; 897 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 898 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 899 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 900 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 901 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 902 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 903 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 906 ierr = ISDestroy(&list1);CHKERRQ(ierr); 907 ierr = ISDestroy(&list2);CHKERRQ(ierr); 908 break; 909 #else 910 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 911 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 912 913 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 914 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 915 printf("Val local_0_start = %ld\n",local_0_start); 916 917 if (dim[1]%2==0) 918 NM = dim[1]+2; 919 else 920 NM = dim[1]+1; 921 922 for (i=0;i<local_n0;i++){ 923 for (j=0;j<dim[1];j++){ 924 tempindx = i*dim[1] + j; 925 tempindx1 = i*NM + j; 926 indx1[tempindx]=local_0_start*dim[1]+tempindx; 927 indx2[tempindx]=low+tempindx1; 928 // printf("Val tempindx1 = %d\n",tempindx1); 929 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 930 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 931 // printf("-------------------------\n",indx2[tempindx],rank); 932 } 933 } 934 935 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 936 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 937 938 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 939 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 942 ierr = ISDestroy(&list1);CHKERRQ(ierr); 943 ierr = ISDestroy(&list2);CHKERRQ(ierr); 944 ierr = PetscFree(indx1);CHKERRQ(ierr); 945 ierr = PetscFree(indx2);CHKERRQ(ierr); 946 break; 947 #endif 948 949 case 3: 950 #if defined(PETSC_USE_COMPLEX) 951 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 952 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 953 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 954 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 955 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 956 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 957 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 960 ierr = ISDestroy(&list1);CHKERRQ(ierr); 961 ierr = ISDestroy(&list2);CHKERRQ(ierr); 962 break; 963 #else 964 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 965 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 966 967 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 968 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 969 printf("Val local_0_start = %ld\n",local_0_start); 970 971 if (dim[2]%2==0) 972 NM = dim[2]+2; 973 else 974 NM = dim[2]+1; 975 976 for (i=0;i<local_n0;i++){ 977 for (j=0;j<dim[1];j++){ 978 for (k=0;k<dim[2];k++){ 979 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 980 tempindx1 = i*dim[1]*NM + j*NM + k; 981 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 982 indx2[tempindx]=low+tempindx1; 983 } 984 // printf("Val tempindx1 = %d\n",tempindx1); 985 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 986 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 987 // printf("-------------------------\n",indx2[tempindx],rank); 988 } 989 } 990 991 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 992 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 993 994 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 995 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 996 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 998 ierr = ISDestroy(&list1);CHKERRQ(ierr); 999 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1000 ierr = PetscFree(indx1);CHKERRQ(ierr); 1001 ierr = PetscFree(indx2);CHKERRQ(ierr); 1002 break; 1003 #endif 1004 1005 default: 1006 #if defined(PETSC_USE_COMPLEX) 1007 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1008 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1009 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1010 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1011 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1012 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1013 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1016 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1017 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1018 1019 1020 #else 1021 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1022 printf("The value of temp is %ld\n",temp); 1023 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1024 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1025 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1026 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1027 1028 partial_dim = fftw->partial_dim; 1029 printf("The value of partial dim is %d\n",partial_dim); 1030 1031 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1032 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1033 printf("Val local_0_start = %ld\n",local_0_start); 1034 1035 if (dim[ndim-1]%2==0) 1036 NM = 2; 1037 else 1038 NM = 1; 1039 1040 j = low; 1041 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1042 { 1043 indx1[i] = local_0_start*partial_dim + i; 1044 indx2[i] = j; 1045 //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1046 if (k%dim[ndim-1]==0) 1047 { j+=NM;} 1048 j++; 1049 } 1050 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1051 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1052 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1053 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1056 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1057 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1058 ierr = PetscFree(indx1);CHKERRQ(ierr); 1059 ierr = PetscFree(indx2);CHKERRQ(ierr); 1060 break; 1061 #endif 1062 } 1063 } 1064 1065 return 0; 1066 } 1067 EXTERN_C_END 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "OutputTransformFFT" 1071 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 1072 { 1073 PetscErrorCode ierr; 1074 PetscFunctionBegin; 1075 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 /* 1080 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 1081 Input A, x, y 1082 A - FFTW matrix 1083 x - FFTW vector 1084 y - PETSc vector that user can read 1085 Options Database Keys: 1086 + -mat_fftw_plannerflags - set FFTW planner flags 1087 1088 Level: intermediate 1089 1090 */ 1091 1092 EXTERN_C_BEGIN 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "OutputTransformFFT_FTTW" 1095 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 1096 { 1097 PetscErrorCode ierr; 1098 MPI_Comm comm=((PetscObject)A)->comm; 1099 Mat_FFT *fft = (Mat_FFT*)A->data; 1100 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 1101 PetscInt N=fft->N, N1, n1 ,NM; 1102 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 1103 PetscInt low, *indx1, *indx2, tempindx, tempindx1; 1104 PetscInt i,j,k,rank,size,partial_dim; 1105 ptrdiff_t alloc_local,local_n0,local_0_start; 1106 ptrdiff_t local_n1,local_1_start,temp; 1107 VecScatter vecscat; 1108 IS list1,list2; 1109 1110 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1111 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1112 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 1113 1114 if (size==1){ 1115 /* 1116 switch (ndim){ 1117 case 1: 1118 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1119 for (i=0;i<dim[0];i++) 1120 { 1121 indx1[i] = i; 1122 } 1123 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1124 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1125 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1126 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1127 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1128 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1129 ierr = PetscFree(indx1);CHKERRQ(ierr); 1130 break; 1131 1132 case 2: 1133 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1134 for (i=0;i<dim[0];i++){ 1135 for (j=0;j<dim[1];j++){ 1136 indx1[i*dim[1]+j] = i*dim[1] + j; 1137 } 1138 } 1139 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1140 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1141 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1144 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1145 ierr = PetscFree(indx1);CHKERRQ(ierr); 1146 break; 1147 case 3: 1148 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1149 for (i=0;i<dim[0];i++){ 1150 for (j=0;j<dim[1];j++){ 1151 for (k=0;k<dim[2];k++){ 1152 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1153 } 1154 } 1155 } 1156 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1157 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1158 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1161 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1162 ierr = PetscFree(indx1);CHKERRQ(ierr); 1163 break; 1164 default: 1165 */ 1166 ierr = ISCreateStride(comm,N,0,1,&list1); 1167 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1168 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1169 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1170 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1171 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1172 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1173 // break; 1174 // } 1175 } 1176 else{ 1177 1178 switch (ndim){ 1179 case 1: 1180 #if defined(PETSC_USE_COMPLEX) 1181 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1182 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 1183 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 1184 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1185 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1186 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1187 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1188 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1189 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1190 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1191 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1192 break; 1193 1194 #else 1195 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1196 #endif 1197 break; 1198 case 2: 1199 #if defined(PETSC_USE_COMPLEX) 1200 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1201 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1202 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1203 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1204 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1205 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1207 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1208 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1209 break; 1210 #else 1211 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1212 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 1213 1214 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1215 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1216 printf("Val local_0_start = %ld\n",local_0_start); 1217 1218 if (dim[1]%2==0) 1219 NM = dim[1]+2; 1220 else 1221 NM = dim[1]+1; 1222 1223 1224 1225 for (i=0;i<local_n0;i++){ 1226 for (j=0;j<dim[1];j++){ 1227 tempindx = i*dim[1] + j; 1228 tempindx1 = i*NM + j; 1229 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1230 indx2[tempindx]=low+tempindx1; 1231 // printf("Val tempindx1 = %d\n",tempindx1); 1232 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1233 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1234 // printf("-------------------------\n",indx2[tempindx],rank); 1235 } 1236 } 1237 1238 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1239 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1240 1241 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1242 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1245 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1246 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1247 ierr = PetscFree(indx1);CHKERRQ(ierr); 1248 ierr = PetscFree(indx2);CHKERRQ(ierr); 1249 break; 1250 #endif 1251 case 3: 1252 #if defined(PETSC_USE_COMPLEX) 1253 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1254 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1255 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1256 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1257 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1258 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1259 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1262 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1263 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1264 break; 1265 #else 1266 1267 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1268 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1269 1270 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1271 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1272 printf("Val local_0_start = %ld\n",local_0_start); 1273 1274 if (dim[2]%2==0) 1275 NM = dim[2]+2; 1276 else 1277 NM = dim[2]+1; 1278 1279 for (i=0;i<local_n0;i++){ 1280 for (j=0;j<dim[1];j++){ 1281 for (k=0;k<dim[2];k++){ 1282 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1283 tempindx1 = i*dim[1]*NM + j*NM + k; 1284 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1285 indx2[tempindx]=low+tempindx1; 1286 } 1287 // printf("Val tempindx1 = %d\n",tempindx1); 1288 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1289 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1290 // printf("-------------------------\n",indx2[tempindx],rank); 1291 } 1292 } 1293 1294 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1295 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1296 1297 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1298 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1299 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1300 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1301 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1302 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1303 ierr = PetscFree(indx1);CHKERRQ(ierr); 1304 ierr = PetscFree(indx2);CHKERRQ(ierr); 1305 break; 1306 #endif 1307 default: 1308 #if defined(PETSC_USE_COMPLEX) 1309 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1310 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1311 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1312 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1313 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1314 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1315 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1316 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1317 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1318 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1319 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1320 #else 1321 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1322 printf("The value of temp is %ld\n",temp); 1323 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1324 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1325 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1326 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1327 1328 partial_dim = fftw->partial_dim; 1329 printf("The value of partial dim is %d\n",partial_dim); 1330 1331 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1332 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1333 printf("Val local_0_start = %ld\n",local_0_start); 1334 1335 if (dim[ndim-1]%2==0) 1336 NM = 2; 1337 else 1338 NM = 1; 1339 1340 j = low; 1341 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1342 { 1343 indx1[i] = local_0_start*partial_dim + i; 1344 indx2[i] = j; 1345 //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1346 if (k%dim[ndim-1]==0) 1347 { j+=NM;} 1348 j++; 1349 } 1350 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1351 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1352 1353 //ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1354 1355 1356 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1357 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1358 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1359 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1360 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1361 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1362 ierr = PetscFree(indx1);CHKERRQ(ierr); 1363 ierr = PetscFree(indx2);CHKERRQ(ierr); 1364 1365 break; 1366 #endif 1367 } 1368 } 1369 return 0; 1370 } 1371 EXTERN_C_END 1372 1373 EXTERN_C_BEGIN 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatCreate_FFTW" 1376 /* 1377 MatCreate_FFTW - Creates a matrix object that provides FFT 1378 via the external package FFTW 1379 Options Database Keys: 1380 + -mat_fftw_plannerflags - set FFTW planner flags 1381 1382 Level: intermediate 1383 1384 */ 1385 1386 PetscErrorCode MatCreate_FFTW(Mat A) 1387 { 1388 PetscErrorCode ierr; 1389 MPI_Comm comm=((PetscObject)A)->comm; 1390 Mat_FFT *fft=(Mat_FFT*)A->data; 1391 Mat_FFTW *fftw; 1392 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1393 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1394 PetscBool flg; 1395 PetscInt p_flag,partial_dim=1,ctr,N1; 1396 PetscMPIInt size,rank; 1397 ptrdiff_t *pdim, temp; 1398 ptrdiff_t local_n1,local_1_start,local_1_end; 1399 1400 PetscFunctionBegin; 1401 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1402 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1403 ierr = MPI_Barrier(PETSC_COMM_WORLD); 1404 1405 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1406 pdim[0] = dim[0]; 1407 for(ctr=1;ctr<ndim;ctr++) 1408 { 1409 partial_dim *= dim[ctr]; 1410 pdim[ctr] = dim[ctr]; 1411 } 1412 1413 if (size == 1) { 1414 #if defined(PETSC_USE_COMPLEX) 1415 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1416 n = N; 1417 #else 1418 int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1419 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1420 n = tot_dim; 1421 #endif 1422 1423 } else { 1424 ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end; 1425 switch (ndim){ 1426 case 1: 1427 #if !defined(PETSC_USE_COMPLEX) 1428 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1429 #else 1430 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 1431 n = (PetscInt)local_n0; 1432 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1433 #endif 1434 // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 1435 break; 1436 case 2: 1437 #if defined(PETSC_USE_COMPLEX) 1438 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1439 /* 1440 PetscMPIInt rank; 1441 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1442 PetscSynchronizedFlush(comm); 1443 */ 1444 n = (PetscInt)local_n0*dim[1]; 1445 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1446 #else 1447 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1448 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1449 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1450 #endif 1451 break; 1452 case 3: 1453 // printf("The value of alloc local is %d",alloc_local); 1454 #if defined(PETSC_USE_COMPLEX) 1455 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1456 n = (PetscInt)local_n0*dim[1]*dim[2]; 1457 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1458 #else 1459 printf("The code comes here\n"); 1460 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1461 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1462 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1463 #endif 1464 break; 1465 default: 1466 #if defined(PETSC_USE_COMPLEX) 1467 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1468 // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 1469 // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 1470 n = (PetscInt)local_n0*partial_dim; 1471 // printf("New partial dimension is %d %d %d",n,N,ndim); 1472 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1473 #else 1474 temp = pdim[ndim-1]; 1475 pdim[ndim-1]= temp/2 + 1; 1476 printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 1477 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1478 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1479 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1480 pdim[ndim-1] = temp; 1481 printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 1482 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1483 #endif 1484 break; 1485 } 1486 } 1487 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1488 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1489 fft->data = (void*)fftw; 1490 1491 fft->n = n; 1492 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1493 fftw->partial_dim = partial_dim; 1494 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1495 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1496 1497 fftw->p_forward = 0; 1498 fftw->p_backward = 0; 1499 fftw->p_flag = FFTW_ESTIMATE; 1500 1501 if (size == 1){ 1502 A->ops->mult = MatMult_SeqFFTW; 1503 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1504 } else { 1505 A->ops->mult = MatMult_MPIFFTW; 1506 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1507 } 1508 fft->matdestroy = MatDestroy_FFTW; 1509 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 1510 A->ops->getvecs = MatGetVecs_FFTW; 1511 A->assembled = PETSC_TRUE; 1512 PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFT_C","MatGetVecsFFT_FFTW",MatGetVecsFFT_FFTW); 1513 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 1514 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1515 1516 /* get runtime options */ 1517 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1518 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1519 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1520 PetscOptionsEnd(); 1521 PetscFunctionReturn(0); 1522 } 1523 EXTERN_C_END 1524 1525 1526 1527 1528