1*c4762a1bSJed Brown static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n"; 2*c4762a1bSJed Brown 3*c4762a1bSJed Brown /* 4*c4762a1bSJed Brown Usage: 5*c4762a1bSJed Brown mpiexec -n <np> ./ex158 -use_FFTW_interface NO 6*c4762a1bSJed Brown mpiexec -n <np> ./ex158 -use_FFTW_interface YES 7*c4762a1bSJed Brown */ 8*c4762a1bSJed Brown 9*c4762a1bSJed Brown #include <petscmat.h> 10*c4762a1bSJed Brown #include <fftw3-mpi.h> 11*c4762a1bSJed Brown 12*c4762a1bSJed Brown int main(int argc,char **args) 13*c4762a1bSJed Brown { 14*c4762a1bSJed Brown PetscErrorCode ierr; 15*c4762a1bSJed Brown PetscMPIInt rank,size; 16*c4762a1bSJed Brown PetscInt N0=50,N1=20,N=N0*N1; 17*c4762a1bSJed Brown PetscRandom rdm; 18*c4762a1bSJed Brown PetscScalar a; 19*c4762a1bSJed Brown PetscReal enorm; 20*c4762a1bSJed Brown Vec x,y,z; 21*c4762a1bSJed Brown PetscBool view=PETSC_FALSE,use_interface=PETSC_TRUE; 22*c4762a1bSJed Brown 23*c4762a1bSJed Brown ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 24*c4762a1bSJed Brown #if defined(PETSC_USE_COMPLEX) 25*c4762a1bSJed Brown SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex"); 26*c4762a1bSJed Brown #endif 27*c4762a1bSJed Brown 28*c4762a1bSJed Brown ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158");CHKERRQ(ierr); 29*c4762a1bSJed Brown ierr = PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158",use_interface, &use_interface, NULL);CHKERRQ(ierr); 30*c4762a1bSJed Brown ierr = PetscOptionsEnd();CHKERRQ(ierr); 31*c4762a1bSJed Brown 32*c4762a1bSJed Brown ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 33*c4762a1bSJed Brown ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 34*c4762a1bSJed Brown 35*c4762a1bSJed Brown ierr = PetscRandomCreate(PETSC_COMM_WORLD, &rdm);CHKERRQ(ierr); 36*c4762a1bSJed Brown ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); 37*c4762a1bSJed Brown 38*c4762a1bSJed Brown if (!use_interface) { 39*c4762a1bSJed Brown /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */ 40*c4762a1bSJed Brown /*---------------------------------------------------------*/ 41*c4762a1bSJed Brown fftw_plan fplan,bplan; 42*c4762a1bSJed Brown fftw_complex *data_in,*data_out,*data_out2; 43*c4762a1bSJed Brown ptrdiff_t alloc_local,local_n0,local_0_start; 44*c4762a1bSJed Brown 45*c4762a1bSJed Brown if (!rank) printf("Use FFTW without PETSc-FFTW interface\n"); 46*c4762a1bSJed Brown fftw_mpi_init(); 47*c4762a1bSJed Brown N = N0*N1; 48*c4762a1bSJed Brown alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start); 49*c4762a1bSJed Brown 50*c4762a1bSJed Brown data_in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 51*c4762a1bSJed Brown data_out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 52*c4762a1bSJed Brown data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 53*c4762a1bSJed Brown 54*c4762a1bSJed Brown ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x);CHKERRQ(ierr); 55*c4762a1bSJed Brown ierr = PetscObjectSetName((PetscObject) x, "Real Space vector");CHKERRQ(ierr); 56*c4762a1bSJed Brown ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y);CHKERRQ(ierr); 57*c4762a1bSJed Brown ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); 58*c4762a1bSJed Brown ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z);CHKERRQ(ierr); 59*c4762a1bSJed Brown ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); 60*c4762a1bSJed Brown 61*c4762a1bSJed Brown fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE); 62*c4762a1bSJed Brown bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE); 63*c4762a1bSJed Brown 64*c4762a1bSJed Brown ierr = VecSetRandom(x, rdm);CHKERRQ(ierr); 65*c4762a1bSJed Brown if (view) {ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} 66*c4762a1bSJed Brown 67*c4762a1bSJed Brown fftw_execute(fplan); 68*c4762a1bSJed Brown if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} 69*c4762a1bSJed Brown 70*c4762a1bSJed Brown fftw_execute(bplan); 71*c4762a1bSJed Brown 72*c4762a1bSJed Brown /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ 73*c4762a1bSJed Brown a = 1.0/(PetscReal)N; 74*c4762a1bSJed Brown ierr = VecScale(z,a);CHKERRQ(ierr); 75*c4762a1bSJed Brown if (view) {ierr = VecView(z, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} 76*c4762a1bSJed Brown ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); 77*c4762a1bSJed Brown ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); 78*c4762a1bSJed Brown if (enorm > 1.e-11) { 79*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %g\n",(double)enorm);CHKERRQ(ierr); 80*c4762a1bSJed Brown } 81*c4762a1bSJed Brown 82*c4762a1bSJed Brown /* Free spaces */ 83*c4762a1bSJed Brown fftw_destroy_plan(fplan); 84*c4762a1bSJed Brown fftw_destroy_plan(bplan); 85*c4762a1bSJed Brown fftw_free(data_in); ierr = VecDestroy(&x);CHKERRQ(ierr); 86*c4762a1bSJed Brown fftw_free(data_out); ierr = VecDestroy(&y);CHKERRQ(ierr); 87*c4762a1bSJed Brown fftw_free(data_out2);ierr = VecDestroy(&z);CHKERRQ(ierr); 88*c4762a1bSJed Brown 89*c4762a1bSJed Brown } else { 90*c4762a1bSJed Brown /* Use PETSc-FFTW interface */ 91*c4762a1bSJed Brown /*-------------------------------------------*/ 92*c4762a1bSJed Brown PetscInt i,*dim,k,DIM; 93*c4762a1bSJed Brown Mat A; 94*c4762a1bSJed Brown Vec input,output; 95*c4762a1bSJed Brown 96*c4762a1bSJed Brown N=30; 97*c4762a1bSJed Brown for (i=2; i<3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */ 98*c4762a1bSJed Brown DIM = i; 99*c4762a1bSJed Brown ierr = PetscMalloc1(i,&dim);CHKERRQ(ierr); 100*c4762a1bSJed Brown for (k=0; k<i; k++) { 101*c4762a1bSJed Brown dim[k]=30; 102*c4762a1bSJed Brown } 103*c4762a1bSJed Brown N *= dim[i-1]; 104*c4762a1bSJed Brown 105*c4762a1bSJed Brown /* Create FFTW object */ 106*c4762a1bSJed Brown if (!rank) { 107*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_SELF,"Use PETSc-FFTW interface...%d-DIM:%d \n",DIM,N);CHKERRQ(ierr); 108*c4762a1bSJed Brown } 109*c4762a1bSJed Brown ierr = MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A);CHKERRQ(ierr); 110*c4762a1bSJed Brown 111*c4762a1bSJed Brown /* Create FFTW vectors that are compatible with parallel layout of A */ 112*c4762a1bSJed Brown ierr = MatCreateVecsFFTW(A,&x,&y,&z);CHKERRQ(ierr); 113*c4762a1bSJed Brown ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr); 114*c4762a1bSJed Brown ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); 115*c4762a1bSJed Brown ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); 116*c4762a1bSJed Brown 117*c4762a1bSJed Brown /* Create and set PETSc vector */ 118*c4762a1bSJed Brown ierr = VecCreate(PETSC_COMM_WORLD,&input);CHKERRQ(ierr); 119*c4762a1bSJed Brown ierr = VecSetSizes(input,PETSC_DECIDE,N);CHKERRQ(ierr); 120*c4762a1bSJed Brown ierr = VecSetFromOptions(input);CHKERRQ(ierr); 121*c4762a1bSJed Brown ierr = VecSetRandom(input,rdm);CHKERRQ(ierr); 122*c4762a1bSJed Brown ierr = VecDuplicate(input,&output);CHKERRQ(ierr); 123*c4762a1bSJed Brown if (view) {ierr = VecView(input,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} 124*c4762a1bSJed Brown 125*c4762a1bSJed Brown /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data 126*c4762a1bSJed Brown can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original 127*c4762a1bSJed Brown data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel 128*c4762a1bSJed Brown layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically 129*c4762a1bSJed Brown at the end of last dimension. This padding is required by FFTW to perform parallel real D.F.T. */ 130*c4762a1bSJed Brown ierr = VecScatterPetscToFFTW(A,input,x);CHKERRQ(ierr);/* buggy for dim = 3, 4... */ 131*c4762a1bSJed Brown 132*c4762a1bSJed Brown /* Apply FFTW_FORWARD and FFTW_BACKWARD */ 133*c4762a1bSJed Brown ierr = MatMult(A,x,y);CHKERRQ(ierr); 134*c4762a1bSJed Brown if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} 135*c4762a1bSJed Brown ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr); 136*c4762a1bSJed Brown 137*c4762a1bSJed Brown /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc 138*c4762a1bSJed Brown performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of 139*c4762a1bSJed Brown the extra spaces that were artificially padded to perform real parallel transform. */ 140*c4762a1bSJed Brown ierr = VecScatterFFTWToPetsc(A,z,output);CHKERRQ(ierr); 141*c4762a1bSJed Brown 142*c4762a1bSJed Brown /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ 143*c4762a1bSJed Brown a = 1.0/(PetscReal)N; 144*c4762a1bSJed Brown ierr = VecScale(output,a);CHKERRQ(ierr); 145*c4762a1bSJed Brown if (view) {ierr = VecView(output,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} 146*c4762a1bSJed Brown ierr = VecAXPY(output,-1.0,input);CHKERRQ(ierr); 147*c4762a1bSJed Brown ierr = VecNorm(output,NORM_1,&enorm);CHKERRQ(ierr); 148*c4762a1bSJed Brown if (enorm > 1.e-09 && !rank) { 149*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %e\n",enorm);CHKERRQ(ierr); 150*c4762a1bSJed Brown } 151*c4762a1bSJed Brown 152*c4762a1bSJed Brown /* Free spaces */ 153*c4762a1bSJed Brown ierr = PetscFree(dim);CHKERRQ(ierr); 154*c4762a1bSJed Brown ierr = VecDestroy(&input);CHKERRQ(ierr); 155*c4762a1bSJed Brown ierr = VecDestroy(&output);CHKERRQ(ierr); 156*c4762a1bSJed Brown ierr = VecDestroy(&x);CHKERRQ(ierr); 157*c4762a1bSJed Brown ierr = VecDestroy(&y);CHKERRQ(ierr); 158*c4762a1bSJed Brown ierr = VecDestroy(&z);CHKERRQ(ierr); 159*c4762a1bSJed Brown ierr = MatDestroy(&A);CHKERRQ(ierr); 160*c4762a1bSJed Brown } 161*c4762a1bSJed Brown } 162*c4762a1bSJed Brown ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); 163*c4762a1bSJed Brown ierr = PetscFinalize(); 164*c4762a1bSJed Brown return ierr; 165*c4762a1bSJed Brown } 166*c4762a1bSJed Brown 167*c4762a1bSJed Brown 168*c4762a1bSJed Brown /*TEST 169*c4762a1bSJed Brown 170*c4762a1bSJed Brown build: 171*c4762a1bSJed Brown requires: fftw !complex 172*c4762a1bSJed Brown 173*c4762a1bSJed Brown test: 174*c4762a1bSJed Brown output_file: output/ex158.out 175*c4762a1bSJed Brown 176*c4762a1bSJed Brown test: 177*c4762a1bSJed Brown suffix: 2 178*c4762a1bSJed Brown nsize: 3 179*c4762a1bSJed Brown 180*c4762a1bSJed Brown TEST*/ 181