xref: /petsc/src/ksp/pc/tutorials/ex3.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1*c4762a1bSJed Brown 
2*c4762a1bSJed Brown static char help[] = "Test PC redistribute on matrix with load imbalance. \n\
3*c4762a1bSJed Brown                       Modified from src/ksp/ksp/tutorials/ex2.c.\n\
4*c4762a1bSJed Brown Input parameters include:\n\
5*c4762a1bSJed Brown   -random_exact_sol : use a random exact solution vector\n\
6*c4762a1bSJed Brown   -view_exact_sol   : write exact solution vector to stdout\n\
7*c4762a1bSJed Brown   -n <mesh_n>       : number of mesh points in y-direction\n\n";
8*c4762a1bSJed Brown /*
9*c4762a1bSJed Brown Example:
10*c4762a1bSJed Brown   mpiexec -n 8 ./ex3 -n 10000 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8 -log_view
11*c4762a1bSJed Brown   mpiexec -n 8 ./ex3 -n 10000 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8 -log_view
12*c4762a1bSJed Brown */
13*c4762a1bSJed Brown 
14*c4762a1bSJed Brown #include <petscksp.h>
15*c4762a1bSJed Brown 
16*c4762a1bSJed Brown int main(int argc,char **args)
17*c4762a1bSJed Brown {
18*c4762a1bSJed Brown   Vec            x,b,u;    /* approx solution, RHS, exact solution */
19*c4762a1bSJed Brown   Mat            A;        /* linear system matrix */
20*c4762a1bSJed Brown   KSP            ksp;      /* linear solver context */
21*c4762a1bSJed Brown   PetscRandom    rctx;     /* random number generator context */
22*c4762a1bSJed Brown   PetscReal      norm;     /* norm of solution error */
23*c4762a1bSJed Brown   PetscInt       i,j,Ii,J,Istart,Iend,m,n = 7,its,nloc,matdistribute=0;
24*c4762a1bSJed Brown   PetscErrorCode ierr;
25*c4762a1bSJed Brown   PetscBool      flg = PETSC_FALSE;
26*c4762a1bSJed Brown   PetscScalar    v;
27*c4762a1bSJed Brown   PetscMPIInt    rank,size;
28*c4762a1bSJed Brown #if defined(PETSC_USE_LOG)
29*c4762a1bSJed Brown   PetscLogStage stage;
30*c4762a1bSJed Brown #endif
31*c4762a1bSJed Brown 
32*c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
33*c4762a1bSJed Brown   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
34*c4762a1bSJed Brown   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
35*c4762a1bSJed Brown   if (size < 2) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This example requires at least 2 MPI processes!");
36*c4762a1bSJed Brown 
37*c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
38*c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-matdistribute",&matdistribute,NULL);CHKERRQ(ierr);
39*c4762a1bSJed Brown   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
40*c4762a1bSJed Brown          Compute the matrix and right-hand-side vector that define
41*c4762a1bSJed Brown          the linear system, Ax = b.
42*c4762a1bSJed Brown      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
43*c4762a1bSJed Brown   switch(matdistribute) {
44*c4762a1bSJed Brown   case 1: /* very imbalanced process load for matrix A */
45*c4762a1bSJed Brown     m    = (1+size)*size;
46*c4762a1bSJed Brown     nloc = (rank+1)*n;
47*c4762a1bSJed Brown     if (rank == size-1) { /* proc[size-1] stores all remaining rows */
48*c4762a1bSJed Brown       nloc = m*n;
49*c4762a1bSJed Brown       for (i=0; i<size-1; i++){
50*c4762a1bSJed Brown         nloc -= (i+1)*n;
51*c4762a1bSJed Brown       }
52*c4762a1bSJed Brown     }
53*c4762a1bSJed Brown     break;
54*c4762a1bSJed Brown   default: /* proc[0] and proc[1] load much smaller row blocks, the rest processes have same loads */
55*c4762a1bSJed Brown     if (rank == 0 || rank == 1) {
56*c4762a1bSJed Brown       nloc = n;
57*c4762a1bSJed Brown     } else {
58*c4762a1bSJed Brown       nloc = 10*n; /* 10x larger load */
59*c4762a1bSJed Brown     }
60*c4762a1bSJed Brown     m = 2 + (size-2)*10;
61*c4762a1bSJed Brown     break;
62*c4762a1bSJed Brown   }
63*c4762a1bSJed Brown   ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
64*c4762a1bSJed Brown   ierr = MatSetSizes(A,nloc,nloc,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
65*c4762a1bSJed Brown   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
66*c4762a1bSJed Brown   ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr);
67*c4762a1bSJed Brown   ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr);
68*c4762a1bSJed Brown   ierr = MatSetUp(A);CHKERRQ(ierr);
69*c4762a1bSJed Brown 
70*c4762a1bSJed Brown   ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
71*c4762a1bSJed Brown   nloc = Iend-Istart;
72*c4762a1bSJed Brown   ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] A Istart,Iend: %D %D; nloc %D\n",rank,Istart,Iend,nloc);CHKERRQ(ierr);
73*c4762a1bSJed Brown   ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr);
74*c4762a1bSJed Brown 
75*c4762a1bSJed Brown   ierr = PetscLogStageRegister("Assembly", &stage);CHKERRQ(ierr);
76*c4762a1bSJed Brown   ierr = PetscLogStagePush(stage);CHKERRQ(ierr);
77*c4762a1bSJed Brown   for (Ii=Istart; Ii<Iend; Ii++) {
78*c4762a1bSJed Brown     v = -1.0; i = Ii/n; j = Ii - i*n;
79*c4762a1bSJed Brown     if (i>0)   {J = Ii - n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
80*c4762a1bSJed Brown     if (i<m-1) {J = Ii + n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
81*c4762a1bSJed Brown     if (j>0)   {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
82*c4762a1bSJed Brown     if (j<n-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
83*c4762a1bSJed Brown     v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
84*c4762a1bSJed Brown   }
85*c4762a1bSJed Brown   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
86*c4762a1bSJed Brown   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
87*c4762a1bSJed Brown   ierr = PetscLogStagePop();CHKERRQ(ierr);
88*c4762a1bSJed Brown 
89*c4762a1bSJed Brown   /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
90*c4762a1bSJed Brown   ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
91*c4762a1bSJed Brown 
92*c4762a1bSJed Brown   /* Create parallel vectors. */
93*c4762a1bSJed Brown   ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr);
94*c4762a1bSJed Brown   ierr = VecSetSizes(u,nloc,PETSC_DECIDE);CHKERRQ(ierr);
95*c4762a1bSJed Brown   ierr = VecSetFromOptions(u);CHKERRQ(ierr);
96*c4762a1bSJed Brown   ierr = VecDuplicate(u,&b);CHKERRQ(ierr);
97*c4762a1bSJed Brown   ierr = VecDuplicate(b,&x);CHKERRQ(ierr);
98*c4762a1bSJed Brown 
99*c4762a1bSJed Brown   /* Set exact solution; then compute right-hand-side vector. */
100*c4762a1bSJed Brown   ierr = PetscOptionsGetBool(NULL,NULL,"-random_exact_sol",&flg,NULL);CHKERRQ(ierr);
101*c4762a1bSJed Brown   if (flg) {
102*c4762a1bSJed Brown     ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rctx);CHKERRQ(ierr);
103*c4762a1bSJed Brown     ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr);
104*c4762a1bSJed Brown     ierr = VecSetRandom(u,rctx);CHKERRQ(ierr);
105*c4762a1bSJed Brown     ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr);
106*c4762a1bSJed Brown   } else {
107*c4762a1bSJed Brown     ierr = VecSet(u,1.0);CHKERRQ(ierr);
108*c4762a1bSJed Brown   }
109*c4762a1bSJed Brown   ierr = MatMult(A,u,b);CHKERRQ(ierr);
110*c4762a1bSJed Brown 
111*c4762a1bSJed Brown   /* View the exact solution vector if desired */
112*c4762a1bSJed Brown   flg  = PETSC_FALSE;
113*c4762a1bSJed Brown   ierr = PetscOptionsGetBool(NULL,NULL,"-view_exact_sol",&flg,NULL);CHKERRQ(ierr);
114*c4762a1bSJed Brown   if (flg) {ierr = VecView(u,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
115*c4762a1bSJed Brown 
116*c4762a1bSJed Brown   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
117*c4762a1bSJed Brown                 Create the linear solver and set various options
118*c4762a1bSJed Brown      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
119*c4762a1bSJed Brown   ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
120*c4762a1bSJed Brown   ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
121*c4762a1bSJed Brown   ierr = KSPSetTolerances(ksp,1.e-2/((m+1)*(n+1)),1.e-50,PETSC_DEFAULT,
122*c4762a1bSJed Brown                           PETSC_DEFAULT);CHKERRQ(ierr);
123*c4762a1bSJed Brown   ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
124*c4762a1bSJed Brown 
125*c4762a1bSJed Brown   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
126*c4762a1bSJed Brown                       Solve the linear system
127*c4762a1bSJed Brown      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
128*c4762a1bSJed Brown   ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr);
129*c4762a1bSJed Brown 
130*c4762a1bSJed Brown   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
131*c4762a1bSJed Brown                       Check solution and clean up
132*c4762a1bSJed Brown      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
133*c4762a1bSJed Brown   ierr = VecAXPY(x,-1.0,u);CHKERRQ(ierr);
134*c4762a1bSJed Brown   ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr);
135*c4762a1bSJed Brown   ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr);
136*c4762a1bSJed Brown   ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);CHKERRQ(ierr);
137*c4762a1bSJed Brown 
138*c4762a1bSJed Brown   /* Free work space. */
139*c4762a1bSJed Brown   ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
140*c4762a1bSJed Brown   ierr = VecDestroy(&u);CHKERRQ(ierr);  ierr = VecDestroy(&x);CHKERRQ(ierr);
141*c4762a1bSJed Brown   ierr = VecDestroy(&b);CHKERRQ(ierr);  ierr = MatDestroy(&A);CHKERRQ(ierr);
142*c4762a1bSJed Brown   ierr = PetscFinalize();
143*c4762a1bSJed Brown   return ierr;
144*c4762a1bSJed Brown }
145*c4762a1bSJed Brown 
146*c4762a1bSJed Brown 
147*c4762a1bSJed Brown /*TEST
148*c4762a1bSJed Brown 
149*c4762a1bSJed Brown    test:
150*c4762a1bSJed Brown       nsize: 8
151*c4762a1bSJed Brown       args: -n 100 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8
152*c4762a1bSJed Brown 
153*c4762a1bSJed Brown    test:
154*c4762a1bSJed Brown       suffix: 2
155*c4762a1bSJed Brown       nsize: 8
156*c4762a1bSJed Brown       args: -n 100 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8
157*c4762a1bSJed Brown 
158*c4762a1bSJed Brown TEST*/
159