xref: /petsc/src/mat/tests/ex158.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1*c4762a1bSJed Brown static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n";
2*c4762a1bSJed Brown 
3*c4762a1bSJed Brown /*
4*c4762a1bSJed Brown  Usage:
5*c4762a1bSJed Brown    mpiexec -n <np> ./ex158 -use_FFTW_interface NO
6*c4762a1bSJed Brown    mpiexec -n <np> ./ex158 -use_FFTW_interface YES
7*c4762a1bSJed Brown */
8*c4762a1bSJed Brown 
9*c4762a1bSJed Brown #include <petscmat.h>
10*c4762a1bSJed Brown #include <fftw3-mpi.h>
11*c4762a1bSJed Brown 
12*c4762a1bSJed Brown int main(int argc,char **args)
13*c4762a1bSJed Brown {
14*c4762a1bSJed Brown   PetscErrorCode ierr;
15*c4762a1bSJed Brown   PetscMPIInt    rank,size;
16*c4762a1bSJed Brown   PetscInt       N0=50,N1=20,N=N0*N1;
17*c4762a1bSJed Brown   PetscRandom    rdm;
18*c4762a1bSJed Brown   PetscScalar    a;
19*c4762a1bSJed Brown   PetscReal      enorm;
20*c4762a1bSJed Brown   Vec            x,y,z;
21*c4762a1bSJed Brown   PetscBool      view=PETSC_FALSE,use_interface=PETSC_TRUE;
22*c4762a1bSJed Brown 
23*c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
24*c4762a1bSJed Brown #if defined(PETSC_USE_COMPLEX)
25*c4762a1bSJed Brown   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex");
26*c4762a1bSJed Brown #endif
27*c4762a1bSJed Brown 
28*c4762a1bSJed Brown   ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158");CHKERRQ(ierr);
29*c4762a1bSJed Brown   ierr = PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158",use_interface, &use_interface, NULL);CHKERRQ(ierr);
30*c4762a1bSJed Brown   ierr = PetscOptionsEnd();CHKERRQ(ierr);
31*c4762a1bSJed Brown 
32*c4762a1bSJed Brown   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
33*c4762a1bSJed Brown   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
34*c4762a1bSJed Brown 
35*c4762a1bSJed Brown   ierr = PetscRandomCreate(PETSC_COMM_WORLD, &rdm);CHKERRQ(ierr);
36*c4762a1bSJed Brown   ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr);
37*c4762a1bSJed Brown 
38*c4762a1bSJed Brown   if (!use_interface) {
39*c4762a1bSJed Brown     /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */
40*c4762a1bSJed Brown     /*---------------------------------------------------------*/
41*c4762a1bSJed Brown     fftw_plan    fplan,bplan;
42*c4762a1bSJed Brown     fftw_complex *data_in,*data_out,*data_out2;
43*c4762a1bSJed Brown     ptrdiff_t    alloc_local,local_n0,local_0_start;
44*c4762a1bSJed Brown 
45*c4762a1bSJed Brown     if (!rank) printf("Use FFTW without PETSc-FFTW interface\n");
46*c4762a1bSJed Brown     fftw_mpi_init();
47*c4762a1bSJed Brown     N           = N0*N1;
48*c4762a1bSJed Brown     alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start);
49*c4762a1bSJed Brown 
50*c4762a1bSJed Brown     data_in   = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
51*c4762a1bSJed Brown     data_out  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
52*c4762a1bSJed Brown     data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
53*c4762a1bSJed Brown 
54*c4762a1bSJed Brown     ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x);CHKERRQ(ierr);
55*c4762a1bSJed Brown     ierr = PetscObjectSetName((PetscObject) x, "Real Space vector");CHKERRQ(ierr);
56*c4762a1bSJed Brown     ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y);CHKERRQ(ierr);
57*c4762a1bSJed Brown     ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr);
58*c4762a1bSJed Brown     ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z);CHKERRQ(ierr);
59*c4762a1bSJed Brown     ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr);
60*c4762a1bSJed Brown 
61*c4762a1bSJed Brown     fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE);
62*c4762a1bSJed Brown     bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE);
63*c4762a1bSJed Brown 
64*c4762a1bSJed Brown     ierr = VecSetRandom(x, rdm);CHKERRQ(ierr);
65*c4762a1bSJed Brown     if (view) {ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
66*c4762a1bSJed Brown 
67*c4762a1bSJed Brown     fftw_execute(fplan);
68*c4762a1bSJed Brown     if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
69*c4762a1bSJed Brown 
70*c4762a1bSJed Brown     fftw_execute(bplan);
71*c4762a1bSJed Brown 
72*c4762a1bSJed Brown     /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
73*c4762a1bSJed Brown     a    = 1.0/(PetscReal)N;
74*c4762a1bSJed Brown     ierr = VecScale(z,a);CHKERRQ(ierr);
75*c4762a1bSJed Brown     if (view) {ierr = VecView(z, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
76*c4762a1bSJed Brown     ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr);
77*c4762a1bSJed Brown     ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr);
78*c4762a1bSJed Brown     if (enorm > 1.e-11) {
79*c4762a1bSJed Brown       ierr = PetscPrintf(PETSC_COMM_SELF,"  Error norm of |x - z| %g\n",(double)enorm);CHKERRQ(ierr);
80*c4762a1bSJed Brown     }
81*c4762a1bSJed Brown 
82*c4762a1bSJed Brown     /* Free spaces */
83*c4762a1bSJed Brown     fftw_destroy_plan(fplan);
84*c4762a1bSJed Brown     fftw_destroy_plan(bplan);
85*c4762a1bSJed Brown     fftw_free(data_in);  ierr = VecDestroy(&x);CHKERRQ(ierr);
86*c4762a1bSJed Brown     fftw_free(data_out); ierr = VecDestroy(&y);CHKERRQ(ierr);
87*c4762a1bSJed Brown     fftw_free(data_out2);ierr = VecDestroy(&z);CHKERRQ(ierr);
88*c4762a1bSJed Brown 
89*c4762a1bSJed Brown   } else {
90*c4762a1bSJed Brown     /* Use PETSc-FFTW interface                  */
91*c4762a1bSJed Brown     /*-------------------------------------------*/
92*c4762a1bSJed Brown     PetscInt i,*dim,k,DIM;
93*c4762a1bSJed Brown     Mat      A;
94*c4762a1bSJed Brown     Vec      input,output;
95*c4762a1bSJed Brown 
96*c4762a1bSJed Brown     N=30;
97*c4762a1bSJed Brown     for (i=2; i<3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */
98*c4762a1bSJed Brown       DIM  = i;
99*c4762a1bSJed Brown       ierr = PetscMalloc1(i,&dim);CHKERRQ(ierr);
100*c4762a1bSJed Brown       for (k=0; k<i; k++) {
101*c4762a1bSJed Brown         dim[k]=30;
102*c4762a1bSJed Brown       }
103*c4762a1bSJed Brown       N *= dim[i-1];
104*c4762a1bSJed Brown 
105*c4762a1bSJed Brown       /* Create FFTW object */
106*c4762a1bSJed Brown       if (!rank) {
107*c4762a1bSJed Brown         ierr = PetscPrintf(PETSC_COMM_SELF,"Use PETSc-FFTW interface...%d-DIM:%d \n",DIM,N);CHKERRQ(ierr);
108*c4762a1bSJed Brown       }
109*c4762a1bSJed Brown       ierr = MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A);CHKERRQ(ierr);
110*c4762a1bSJed Brown 
111*c4762a1bSJed Brown       /* Create FFTW vectors that are compatible with parallel layout of A */
112*c4762a1bSJed Brown       ierr = MatCreateVecsFFTW(A,&x,&y,&z);CHKERRQ(ierr);
113*c4762a1bSJed Brown       ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr);
114*c4762a1bSJed Brown       ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr);
115*c4762a1bSJed Brown       ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr);
116*c4762a1bSJed Brown 
117*c4762a1bSJed Brown       /* Create and set PETSc vector */
118*c4762a1bSJed Brown       ierr = VecCreate(PETSC_COMM_WORLD,&input);CHKERRQ(ierr);
119*c4762a1bSJed Brown       ierr = VecSetSizes(input,PETSC_DECIDE,N);CHKERRQ(ierr);
120*c4762a1bSJed Brown       ierr = VecSetFromOptions(input);CHKERRQ(ierr);
121*c4762a1bSJed Brown       ierr = VecSetRandom(input,rdm);CHKERRQ(ierr);
122*c4762a1bSJed Brown       ierr = VecDuplicate(input,&output);CHKERRQ(ierr);
123*c4762a1bSJed Brown       if (view) {ierr = VecView(input,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
124*c4762a1bSJed Brown 
125*c4762a1bSJed Brown       /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data
126*c4762a1bSJed Brown          can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original
127*c4762a1bSJed Brown          data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel
128*c4762a1bSJed Brown          layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically
129*c4762a1bSJed Brown          at the end of last  dimension. This padding is required by FFTW to perform parallel real D.F.T.  */
130*c4762a1bSJed Brown       ierr = VecScatterPetscToFFTW(A,input,x);CHKERRQ(ierr);/* buggy for dim = 3, 4... */
131*c4762a1bSJed Brown 
132*c4762a1bSJed Brown       /* Apply FFTW_FORWARD and FFTW_BACKWARD */
133*c4762a1bSJed Brown       ierr = MatMult(A,x,y);CHKERRQ(ierr);
134*c4762a1bSJed Brown       if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
135*c4762a1bSJed Brown       ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr);
136*c4762a1bSJed Brown 
137*c4762a1bSJed Brown       /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc
138*c4762a1bSJed Brown          performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of
139*c4762a1bSJed Brown          the extra spaces that were artificially padded to perform real parallel transform.    */
140*c4762a1bSJed Brown       ierr = VecScatterFFTWToPetsc(A,z,output);CHKERRQ(ierr);
141*c4762a1bSJed Brown 
142*c4762a1bSJed Brown       /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
143*c4762a1bSJed Brown       a    = 1.0/(PetscReal)N;
144*c4762a1bSJed Brown       ierr = VecScale(output,a);CHKERRQ(ierr);
145*c4762a1bSJed Brown       if (view) {ierr = VecView(output,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
146*c4762a1bSJed Brown       ierr = VecAXPY(output,-1.0,input);CHKERRQ(ierr);
147*c4762a1bSJed Brown       ierr = VecNorm(output,NORM_1,&enorm);CHKERRQ(ierr);
148*c4762a1bSJed Brown       if (enorm > 1.e-09 && !rank) {
149*c4762a1bSJed Brown         ierr = PetscPrintf(PETSC_COMM_SELF,"  Error norm of |x - z| %e\n",enorm);CHKERRQ(ierr);
150*c4762a1bSJed Brown       }
151*c4762a1bSJed Brown 
152*c4762a1bSJed Brown       /* Free spaces */
153*c4762a1bSJed Brown       ierr = PetscFree(dim);CHKERRQ(ierr);
154*c4762a1bSJed Brown       ierr = VecDestroy(&input);CHKERRQ(ierr);
155*c4762a1bSJed Brown       ierr = VecDestroy(&output);CHKERRQ(ierr);
156*c4762a1bSJed Brown       ierr = VecDestroy(&x);CHKERRQ(ierr);
157*c4762a1bSJed Brown       ierr = VecDestroy(&y);CHKERRQ(ierr);
158*c4762a1bSJed Brown       ierr = VecDestroy(&z);CHKERRQ(ierr);
159*c4762a1bSJed Brown       ierr = MatDestroy(&A);CHKERRQ(ierr);
160*c4762a1bSJed Brown     }
161*c4762a1bSJed Brown   }
162*c4762a1bSJed Brown   ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr);
163*c4762a1bSJed Brown   ierr = PetscFinalize();
164*c4762a1bSJed Brown   return ierr;
165*c4762a1bSJed Brown }
166*c4762a1bSJed Brown 
167*c4762a1bSJed Brown 
168*c4762a1bSJed Brown /*TEST
169*c4762a1bSJed Brown 
170*c4762a1bSJed Brown    build:
171*c4762a1bSJed Brown       requires: fftw !complex
172*c4762a1bSJed Brown 
173*c4762a1bSJed Brown    test:
174*c4762a1bSJed Brown       output_file: output/ex158.out
175*c4762a1bSJed Brown 
176*c4762a1bSJed Brown    test:
177*c4762a1bSJed Brown       suffix: 2
178*c4762a1bSJed Brown       nsize: 3
179*c4762a1bSJed Brown 
180*c4762a1bSJed Brown TEST*/
181