1c4762a1bSJed Brown static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown /* 4c4762a1bSJed Brown Usage: 5c4762a1bSJed Brown mpiexec -n <np> ./ex158 -use_FFTW_interface NO 6c4762a1bSJed Brown mpiexec -n <np> ./ex158 -use_FFTW_interface YES 7c4762a1bSJed Brown */ 8c4762a1bSJed Brown 9c4762a1bSJed Brown #include <petscmat.h> 10c4762a1bSJed Brown #include <fftw3-mpi.h> 11c4762a1bSJed Brown 12c4762a1bSJed Brown int main(int argc,char **args) 13c4762a1bSJed Brown { 14c4762a1bSJed Brown PetscErrorCode ierr; 15c4762a1bSJed Brown PetscMPIInt rank,size; 16c4762a1bSJed Brown PetscInt N0=50,N1=20,N=N0*N1; 17c4762a1bSJed Brown PetscRandom rdm; 18c4762a1bSJed Brown PetscScalar a; 19c4762a1bSJed Brown PetscReal enorm; 20c4762a1bSJed Brown Vec x,y,z; 21c4762a1bSJed Brown PetscBool view=PETSC_FALSE,use_interface=PETSC_TRUE; 22c4762a1bSJed Brown 23c4762a1bSJed Brown ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 24c4762a1bSJed Brown #if defined(PETSC_USE_COMPLEX) 25c4762a1bSJed Brown SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex"); 26c4762a1bSJed Brown #endif 27c4762a1bSJed Brown 28c4762a1bSJed Brown ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158");CHKERRQ(ierr); 29*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158",use_interface, &use_interface, NULL)); 30c4762a1bSJed Brown ierr = PetscOptionsEnd();CHKERRQ(ierr); 31c4762a1bSJed Brown 32*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 33*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 34c4762a1bSJed Brown 35*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomCreate(PETSC_COMM_WORLD, &rdm)); 36*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomSetFromOptions(rdm)); 37c4762a1bSJed Brown 38c4762a1bSJed Brown if (!use_interface) { 39c4762a1bSJed Brown /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */ 40c4762a1bSJed Brown /*---------------------------------------------------------*/ 41c4762a1bSJed Brown fftw_plan fplan,bplan; 42c4762a1bSJed Brown fftw_complex *data_in,*data_out,*data_out2; 43c4762a1bSJed Brown ptrdiff_t alloc_local,local_n0,local_0_start; 44c4762a1bSJed Brown 45dd400576SPatrick Sanan if (rank == 0) printf("Use FFTW without PETSc-FFTW interface\n"); 46c4762a1bSJed Brown fftw_mpi_init(); 47c4762a1bSJed Brown N = N0*N1; 48c4762a1bSJed Brown alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start); 49c4762a1bSJed Brown 50c4762a1bSJed Brown data_in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 51c4762a1bSJed Brown data_out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 52c4762a1bSJed Brown data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 53c4762a1bSJed Brown 54*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x)); 55*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject) x, "Real Space vector")); 56*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y)); 57*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject) y, "Frequency space vector")); 58*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z)); 59*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject) z, "Reconstructed vector")); 60c4762a1bSJed Brown 61c4762a1bSJed Brown fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE); 62c4762a1bSJed Brown bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE); 63c4762a1bSJed Brown 64*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetRandom(x, rdm)); 65*5f80ce2aSJacob Faibussowitsch if (view) CHKERRQ(VecView(x,PETSC_VIEWER_STDOUT_WORLD)); 66c4762a1bSJed Brown 67c4762a1bSJed Brown fftw_execute(fplan); 68*5f80ce2aSJacob Faibussowitsch if (view) CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 69c4762a1bSJed Brown 70c4762a1bSJed Brown fftw_execute(bplan); 71c4762a1bSJed Brown 72c4762a1bSJed Brown /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ 73c4762a1bSJed Brown a = 1.0/(PetscReal)N; 74*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScale(z,a)); 75*5f80ce2aSJacob Faibussowitsch if (view) CHKERRQ(VecView(z, PETSC_VIEWER_STDOUT_WORLD)); 76*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecAXPY(z,-1.0,x)); 77*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecNorm(z,NORM_1,&enorm)); 78c4762a1bSJed Brown if (enorm > 1.e-11) { 79*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %g\n",(double)enorm)); 80c4762a1bSJed Brown } 81c4762a1bSJed Brown 82c4762a1bSJed Brown /* Free spaces */ 83c4762a1bSJed Brown fftw_destroy_plan(fplan); 84c4762a1bSJed Brown fftw_destroy_plan(bplan); 85*5f80ce2aSJacob Faibussowitsch fftw_free(data_in); CHKERRQ(VecDestroy(&x)); 86*5f80ce2aSJacob Faibussowitsch fftw_free(data_out); CHKERRQ(VecDestroy(&y)); 87*5f80ce2aSJacob Faibussowitsch fftw_free(data_out2);CHKERRQ(VecDestroy(&z)); 88c4762a1bSJed Brown 89c4762a1bSJed Brown } else { 90c4762a1bSJed Brown /* Use PETSc-FFTW interface */ 91c4762a1bSJed Brown /*-------------------------------------------*/ 92c4762a1bSJed Brown PetscInt i,*dim,k,DIM; 93c4762a1bSJed Brown Mat A; 94c4762a1bSJed Brown Vec input,output; 95c4762a1bSJed Brown 96c4762a1bSJed Brown N=30; 97c4762a1bSJed Brown for (i=2; i<3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */ 98c4762a1bSJed Brown DIM = i; 99*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(i,&dim)); 100c4762a1bSJed Brown for (k=0; k<i; k++) { 101c4762a1bSJed Brown dim[k]=30; 102c4762a1bSJed Brown } 103c4762a1bSJed Brown N *= dim[i-1]; 104c4762a1bSJed Brown 105c4762a1bSJed Brown /* Create FFTW object */ 106dd400576SPatrick Sanan if (rank == 0) { 107*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(PETSC_COMM_SELF,"Use PETSc-FFTW interface...%d-DIM:%d \n",DIM,N)); 108c4762a1bSJed Brown } 109*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A)); 110c4762a1bSJed Brown 111c4762a1bSJed Brown /* Create FFTW vectors that are compatible with parallel layout of A */ 112*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateVecsFFTW(A,&x,&y,&z)); 113*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject) x, "Real space vector")); 114*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject) y, "Frequency space vector")); 115*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject) z, "Reconstructed vector")); 116c4762a1bSJed Brown 117c4762a1bSJed Brown /* Create and set PETSc vector */ 118*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreate(PETSC_COMM_WORLD,&input)); 119*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetSizes(input,PETSC_DECIDE,N)); 120*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetFromOptions(input)); 121*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetRandom(input,rdm)); 122*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDuplicate(input,&output)); 123*5f80ce2aSJacob Faibussowitsch if (view) CHKERRQ(VecView(input,PETSC_VIEWER_STDOUT_WORLD)); 124c4762a1bSJed Brown 125c4762a1bSJed Brown /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data 126c4762a1bSJed Brown can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original 127c4762a1bSJed Brown data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel 128c4762a1bSJed Brown layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically 129c4762a1bSJed Brown at the end of last dimension. This padding is required by FFTW to perform parallel real D.F.T. */ 130*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterPetscToFFTW(A,input,x));/* buggy for dim = 3, 4... */ 131c4762a1bSJed Brown 132c4762a1bSJed Brown /* Apply FFTW_FORWARD and FFTW_BACKWARD */ 133*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMult(A,x,y)); 134*5f80ce2aSJacob Faibussowitsch if (view) CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 135*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMultTranspose(A,y,z)); 136c4762a1bSJed Brown 137c4762a1bSJed Brown /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc 138c4762a1bSJed Brown performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of 139c4762a1bSJed Brown the extra spaces that were artificially padded to perform real parallel transform. */ 140*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterFFTWToPetsc(A,z,output)); 141c4762a1bSJed Brown 142c4762a1bSJed Brown /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ 143c4762a1bSJed Brown a = 1.0/(PetscReal)N; 144*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScale(output,a)); 145*5f80ce2aSJacob Faibussowitsch if (view) CHKERRQ(VecView(output,PETSC_VIEWER_STDOUT_WORLD)); 146*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecAXPY(output,-1.0,input)); 147*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecNorm(output,NORM_1,&enorm)); 148dd400576SPatrick Sanan if (enorm > 1.e-09 && rank == 0) { 149*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %e\n",enorm)); 150c4762a1bSJed Brown } 151c4762a1bSJed Brown 152c4762a1bSJed Brown /* Free spaces */ 153*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(dim)); 154*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&input)); 155*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&output)); 156*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&x)); 157*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&y)); 158*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&z)); 159*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 160c4762a1bSJed Brown } 161c4762a1bSJed Brown } 162*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscRandomDestroy(&rdm)); 163c4762a1bSJed Brown ierr = PetscFinalize(); 164c4762a1bSJed Brown return ierr; 165c4762a1bSJed Brown } 166c4762a1bSJed Brown 167c4762a1bSJed Brown /*TEST 168c4762a1bSJed Brown 169c4762a1bSJed Brown build: 1700cf2e031SBarry Smith requires: !mpiuni fftw !complex 171c4762a1bSJed Brown 172c4762a1bSJed Brown test: 173c4762a1bSJed Brown output_file: output/ex158.out 174c4762a1bSJed Brown 175c4762a1bSJed Brown test: 176c4762a1bSJed Brown suffix: 2 177c4762a1bSJed Brown nsize: 3 178c4762a1bSJed Brown 179c4762a1bSJed Brown TEST*/ 180