1c4762a1bSJed Brown 2c4762a1bSJed Brown static char help[] = "Test PC redistribute on matrix with load imbalance. \n\ 3c4762a1bSJed Brown Modified from src/ksp/ksp/tutorials/ex2.c.\n\ 4c4762a1bSJed Brown Input parameters include:\n\ 5c4762a1bSJed Brown -random_exact_sol : use a random exact solution vector\n\ 6c4762a1bSJed Brown -view_exact_sol : write exact solution vector to stdout\n\ 710582eeaSJed Brown -n <mesh_y> : number of mesh points\n\n"; 8c4762a1bSJed Brown /* 9c4762a1bSJed Brown Example: 10c4762a1bSJed Brown mpiexec -n 8 ./ex3 -n 10000 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8 -log_view 11c4762a1bSJed Brown mpiexec -n 8 ./ex3 -n 10000 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8 -log_view 12c4762a1bSJed Brown */ 13c4762a1bSJed Brown 14c4762a1bSJed Brown #include <petscksp.h> 15c4762a1bSJed Brown 16*9371c9d4SSatish Balay int main(int argc, char **args) { 17c4762a1bSJed Brown Vec x, b, u; /* approx solution, RHS, exact solution */ 18c4762a1bSJed Brown Mat A; /* linear system matrix */ 19c4762a1bSJed Brown KSP ksp; /* linear solver context */ 20c4762a1bSJed Brown PetscRandom rctx; /* random number generator context */ 21c4762a1bSJed Brown PetscReal norm; /* norm of solution error */ 22c4762a1bSJed Brown PetscInt i, j, Ii, J, Istart, Iend, m, n = 7, its, nloc, matdistribute = 0; 23c4762a1bSJed Brown PetscBool flg = PETSC_FALSE; 24c4762a1bSJed Brown PetscScalar v; 25c4762a1bSJed Brown PetscMPIInt rank, size; 26c4762a1bSJed Brown #if defined(PETSC_USE_LOG) 27c4762a1bSJed Brown PetscLogStage stage; 28c4762a1bSJed Brown #endif 29c4762a1bSJed Brown 30327415f7SBarry Smith PetscFunctionBeginUser; 319566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 329566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 339566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 347827d75bSBarry Smith PetscCheck(size > 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This example requires at least 2 MPI processes!"); 35c4762a1bSJed Brown 369566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 379566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-matdistribute", &matdistribute, NULL)); 38c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 39c4762a1bSJed Brown Compute the matrix and right-hand-side vector that define 40c4762a1bSJed Brown the linear system, Ax = b. 41c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 42c4762a1bSJed Brown switch (matdistribute) { 43c4762a1bSJed Brown case 1: /* very imbalanced process load for matrix A */ 44c4762a1bSJed Brown m = (1 + size) * size; 45c4762a1bSJed Brown nloc = (rank + 1) * n; 46c4762a1bSJed Brown if (rank == size - 1) { /* proc[size-1] stores all remaining rows */ 47c4762a1bSJed Brown nloc = m * n; 48*9371c9d4SSatish Balay for (i = 0; i < size - 1; i++) { nloc -= (i + 1) * n; } 49c4762a1bSJed Brown } 50c4762a1bSJed Brown break; 51c4762a1bSJed Brown default: /* proc[0] and proc[1] load much smaller row blocks, the rest processes have same loads */ 52c4762a1bSJed Brown if (rank == 0 || rank == 1) { 53c4762a1bSJed Brown nloc = n; 54c4762a1bSJed Brown } else { 55c4762a1bSJed Brown nloc = 10 * n; /* 10x larger load */ 56c4762a1bSJed Brown } 57c4762a1bSJed Brown m = 2 + (size - 2) * 10; 58c4762a1bSJed Brown break; 59c4762a1bSJed Brown } 609566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 619566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, nloc, nloc, PETSC_DECIDE, PETSC_DECIDE)); 629566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 639566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL)); 649566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL)); 659566063dSJacob Faibussowitsch PetscCall(MatSetUp(A)); 66c4762a1bSJed Brown 679566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 68c4762a1bSJed Brown nloc = Iend - Istart; 6963a3b9bcSJacob Faibussowitsch PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "[%d] A Istart,Iend: %" PetscInt_FMT " %" PetscInt_FMT "; nloc %" PetscInt_FMT "\n", rank, Istart, Iend, nloc)); 709566063dSJacob Faibussowitsch PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT)); 71c4762a1bSJed Brown 729566063dSJacob Faibussowitsch PetscCall(PetscLogStageRegister("Assembly", &stage)); 739566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(stage)); 74c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 75*9371c9d4SSatish Balay v = -1.0; 76*9371c9d4SSatish Balay i = Ii / n; 77*9371c9d4SSatish Balay j = Ii - i * n; 78*9371c9d4SSatish Balay if (i > 0) { 79*9371c9d4SSatish Balay J = Ii - n; 80*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 81*9371c9d4SSatish Balay } 82*9371c9d4SSatish Balay if (i < m - 1) { 83*9371c9d4SSatish Balay J = Ii + n; 84*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 85*9371c9d4SSatish Balay } 86*9371c9d4SSatish Balay if (j > 0) { 87*9371c9d4SSatish Balay J = Ii - 1; 88*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 89*9371c9d4SSatish Balay } 90*9371c9d4SSatish Balay if (j < n - 1) { 91*9371c9d4SSatish Balay J = Ii + 1; 92*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 93*9371c9d4SSatish Balay } 94*9371c9d4SSatish Balay v = 4.0; 95*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 96c4762a1bSJed Brown } 979566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 989566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 999566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 100c4762a1bSJed Brown 101c4762a1bSJed Brown /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */ 1029566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE)); 103c4762a1bSJed Brown 104c4762a1bSJed Brown /* Create parallel vectors. */ 1059566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_WORLD, &u)); 1069566063dSJacob Faibussowitsch PetscCall(VecSetSizes(u, nloc, PETSC_DECIDE)); 1079566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(u)); 1089566063dSJacob Faibussowitsch PetscCall(VecDuplicate(u, &b)); 1099566063dSJacob Faibussowitsch PetscCall(VecDuplicate(b, &x)); 110c4762a1bSJed Brown 111c4762a1bSJed Brown /* Set exact solution; then compute right-hand-side vector. */ 1129566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-random_exact_sol", &flg, NULL)); 113c4762a1bSJed Brown if (flg) { 1149566063dSJacob Faibussowitsch PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx)); 1159566063dSJacob Faibussowitsch PetscCall(PetscRandomSetFromOptions(rctx)); 1169566063dSJacob Faibussowitsch PetscCall(VecSetRandom(u, rctx)); 1179566063dSJacob Faibussowitsch PetscCall(PetscRandomDestroy(&rctx)); 118c4762a1bSJed Brown } else { 1199566063dSJacob Faibussowitsch PetscCall(VecSet(u, 1.0)); 120c4762a1bSJed Brown } 1219566063dSJacob Faibussowitsch PetscCall(MatMult(A, u, b)); 122c4762a1bSJed Brown 123c4762a1bSJed Brown /* View the exact solution vector if desired */ 124c4762a1bSJed Brown flg = PETSC_FALSE; 1259566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL)); 1269566063dSJacob Faibussowitsch if (flg) PetscCall(VecView(u, PETSC_VIEWER_STDOUT_WORLD)); 127c4762a1bSJed Brown 128c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 129c4762a1bSJed Brown Create the linear solver and set various options 130c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 1319566063dSJacob Faibussowitsch PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp)); 1329566063dSJacob Faibussowitsch PetscCall(KSPSetOperators(ksp, A, A)); 133d0609cedSBarry Smith PetscCall(KSPSetTolerances(ksp, 1.e-2 / ((m + 1) * (n + 1)), PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT)); 1349566063dSJacob Faibussowitsch PetscCall(KSPSetFromOptions(ksp)); 135c4762a1bSJed Brown 136c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 137c4762a1bSJed Brown Solve the linear system 138c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 1399566063dSJacob Faibussowitsch PetscCall(KSPSolve(ksp, b, x)); 140c4762a1bSJed Brown 141c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 142c4762a1bSJed Brown Check solution and clean up 143c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 1449566063dSJacob Faibussowitsch PetscCall(VecAXPY(x, -1.0, u)); 1459566063dSJacob Faibussowitsch PetscCall(VecNorm(x, NORM_2, &norm)); 1469566063dSJacob Faibussowitsch PetscCall(KSPGetIterationNumber(ksp, &its)); 14763a3b9bcSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its)); 148c4762a1bSJed Brown 149c4762a1bSJed Brown /* Free work space. */ 1509566063dSJacob Faibussowitsch PetscCall(KSPDestroy(&ksp)); 151*9371c9d4SSatish Balay PetscCall(VecDestroy(&u)); 152*9371c9d4SSatish Balay PetscCall(VecDestroy(&x)); 153*9371c9d4SSatish Balay PetscCall(VecDestroy(&b)); 154*9371c9d4SSatish Balay PetscCall(MatDestroy(&A)); 1559566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 156b122ec5aSJacob Faibussowitsch return 0; 157c4762a1bSJed Brown } 158c4762a1bSJed Brown 159c4762a1bSJed Brown /*TEST 160c4762a1bSJed Brown 161c4762a1bSJed Brown test: 162c4762a1bSJed Brown nsize: 8 163c4762a1bSJed Brown args: -n 100 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8 164c4762a1bSJed Brown 165c4762a1bSJed Brown test: 166c4762a1bSJed Brown suffix: 2 167c4762a1bSJed Brown nsize: 8 168c4762a1bSJed Brown args: -n 100 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8 169c4762a1bSJed Brown 170c4762a1bSJed Brown TEST*/ 171