1c4762a1bSJed Brown static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\ 2c4762a1bSJed Brown Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n"; 3c4762a1bSJed Brown 4c4762a1bSJed Brown #include <petscmat.h> 5c4762a1bSJed Brown 6*9371c9d4SSatish Balay int main(int argc, char **args) { 7c4762a1bSJed Brown Mat A; 8c4762a1bSJed Brown Vec x, rhs, y; 9c4762a1bSJed Brown PetscInt i, j, k, b, m = 3, n, nlocal = 2, bs = 1, Ii, J; 10c4762a1bSJed Brown PetscInt *boundary_nodes, nboundary_nodes, *boundary_indices; 11c4762a1bSJed Brown PetscMPIInt rank, size; 12c4762a1bSJed Brown PetscScalar v, v0, v1, v2, a0 = 0.1, a, rhsval, *boundary_values, diag = 1.0; 13c4762a1bSJed Brown PetscReal norm; 14c4762a1bSJed Brown char convname[64]; 15c4762a1bSJed Brown PetscBool upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE; 16c4762a1bSJed Brown 17327415f7SBarry Smith PetscFunctionBeginUser; 189566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 199566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 209566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 21c4762a1bSJed Brown n = nlocal * size; 22c4762a1bSJed Brown 239566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-bs", &bs, NULL)); 249566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-nonlocal_bc", &nonlocalBC, NULL)); 259566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetScalar(NULL, NULL, "-diag", &diag, NULL)); 269566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, NULL, "-convname", convname, sizeof(convname), &convert)); 279566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-zerorhs", &zerorhs, NULL)); 28c4762a1bSJed Brown 299566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 309566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n * bs, m * n * bs)); 319566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 329566063dSJacob Faibussowitsch PetscCall(MatSetUp(A)); 33c4762a1bSJed Brown 349566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &rhs)); 359566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(rhs)); 369566063dSJacob Faibussowitsch PetscCall(VecSetUp(rhs)); 37c4762a1bSJed Brown 38c4762a1bSJed Brown rhsval = 0.0; 39c4762a1bSJed Brown for (i = 0; i < m; i++) { 40c4762a1bSJed Brown for (j = nlocal * rank; j < nlocal * (rank + 1); j++) { 41c4762a1bSJed Brown a = a0; 42c4762a1bSJed Brown for (b = 0; b < bs; b++) { 43c4762a1bSJed Brown /* let's start with a 5-point stencil diffusion term */ 44*9371c9d4SSatish Balay v = -1.0; 45*9371c9d4SSatish Balay Ii = (j + n * i) * bs + b; 46*9371c9d4SSatish Balay if (i > 0) { 47*9371c9d4SSatish Balay J = Ii - n * bs; 48*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 49*9371c9d4SSatish Balay } 50*9371c9d4SSatish Balay if (i < m - 1) { 51*9371c9d4SSatish Balay J = Ii + n * bs; 52*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 53*9371c9d4SSatish Balay } 54*9371c9d4SSatish Balay if (j > 0) { 55*9371c9d4SSatish Balay J = Ii - 1 * bs; 56*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 57*9371c9d4SSatish Balay } 58*9371c9d4SSatish Balay if (j < n - 1) { 59*9371c9d4SSatish Balay J = Ii + 1 * bs; 60*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 61*9371c9d4SSatish Balay } 62*9371c9d4SSatish Balay v = 4.0; 63*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES)); 64c4762a1bSJed Brown if (upwind) { 65c4762a1bSJed Brown /* now add a 2nd order upwind advection term to add a little asymmetry */ 66c4762a1bSJed Brown if (j > 2) { 67*9371c9d4SSatish Balay J = Ii - 2 * bs; 68*9371c9d4SSatish Balay v2 = 0.5 * a; 69*9371c9d4SSatish Balay v1 = -2.0 * a; 70*9371c9d4SSatish Balay v0 = 1.5 * a; 719566063dSJacob Faibussowitsch PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v2, ADD_VALUES)); 72c4762a1bSJed Brown } else { 73c4762a1bSJed Brown /* fall back to 1st order upwind */ 74*9371c9d4SSatish Balay v1 = -1.0 * a; 75*9371c9d4SSatish Balay v0 = 1.0 * a; 76c4762a1bSJed Brown }; 77*9371c9d4SSatish Balay if (j > 1) { 78*9371c9d4SSatish Balay J = Ii - 1 * bs; 79*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v1, ADD_VALUES)); 80*9371c9d4SSatish Balay } 819566063dSJacob Faibussowitsch PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v0, ADD_VALUES)); 82c4762a1bSJed Brown a /= 10.; /* use a different velocity for the next component */ 83c4762a1bSJed Brown /* add a coupling to the previous and next components */ 84c4762a1bSJed Brown v = 0.5; 85*9371c9d4SSatish Balay if (b > 0) { 86*9371c9d4SSatish Balay J = Ii - 1; 87*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 88*9371c9d4SSatish Balay } 89*9371c9d4SSatish Balay if (b < bs - 1) { 90*9371c9d4SSatish Balay J = Ii + 1; 91*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 92*9371c9d4SSatish Balay } 93c4762a1bSJed Brown } 94c4762a1bSJed Brown /* make up some rhs */ 959566063dSJacob Faibussowitsch PetscCall(VecSetValue(rhs, Ii, rhsval, INSERT_VALUES)); 96c4762a1bSJed Brown rhsval += 1.0; 97c4762a1bSJed Brown } 98c4762a1bSJed Brown } 99c4762a1bSJed Brown } 1009566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1019566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 102c4762a1bSJed Brown 103c4762a1bSJed Brown if (convert) { /* Test different Mat implementations */ 104c4762a1bSJed Brown Mat B; 105c4762a1bSJed Brown 1069566063dSJacob Faibussowitsch PetscCall(MatConvert(A, convname, MAT_INITIAL_MATRIX, &B)); 1079566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 108c4762a1bSJed Brown A = B; 109c4762a1bSJed Brown } 110c4762a1bSJed Brown 1119566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(rhs)); 1129566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(rhs)); 113c4762a1bSJed Brown /* set rhs to zero to simplify */ 1141baa6e33SBarry Smith if (zerorhs) PetscCall(VecZeroEntries(rhs)); 115c4762a1bSJed Brown 116c4762a1bSJed Brown if (nonlocalBC) { 117c4762a1bSJed Brown /*version where boundary conditions are set by processes that don't necessarily own the nodes */ 118dd400576SPatrick Sanan if (rank == 0) { 119c4762a1bSJed Brown nboundary_nodes = size > m ? nlocal : m - size + nlocal; 1209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nboundary_nodes, &boundary_nodes)); 121c4762a1bSJed Brown k = 0; 122c4762a1bSJed Brown for (i = size; i < m; i++, k++) { boundary_nodes[k] = n * i; }; 123c4762a1bSJed Brown } else if (rank < m) { 124c4762a1bSJed Brown nboundary_nodes = nlocal + 1; 1259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nboundary_nodes, &boundary_nodes)); 126c4762a1bSJed Brown boundary_nodes[0] = rank * n; 127c4762a1bSJed Brown k = 1; 128c4762a1bSJed Brown } else { 129c4762a1bSJed Brown nboundary_nodes = nlocal; 1309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nboundary_nodes, &boundary_nodes)); 131c4762a1bSJed Brown k = 0; 132c4762a1bSJed Brown } 133c4762a1bSJed Brown for (j = nlocal * rank; j < nlocal * (rank + 1); j++, k++) { boundary_nodes[k] = j; }; 134c4762a1bSJed Brown } else { 135c4762a1bSJed Brown /*version where boundary conditions are set by the node owners only */ 1369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m * n, &boundary_nodes)); 137c4762a1bSJed Brown k = 0; 138c4762a1bSJed Brown for (j = 0; j < n; j++) { 139c4762a1bSJed Brown Ii = j; 140c4762a1bSJed Brown if (Ii >= rank * m * nlocal && Ii < (rank + 1) * m * nlocal) boundary_nodes[k++] = Ii; 141c4762a1bSJed Brown } 142c4762a1bSJed Brown for (i = 1; i < m; i++) { 143c4762a1bSJed Brown Ii = n * i; 144c4762a1bSJed Brown if (Ii >= rank * m * nlocal && Ii < (rank + 1) * m * nlocal) boundary_nodes[k++] = Ii; 145c4762a1bSJed Brown } 146c4762a1bSJed Brown nboundary_nodes = k; 147c4762a1bSJed Brown } 148c4762a1bSJed Brown 1499566063dSJacob Faibussowitsch PetscCall(VecDuplicate(rhs, &x)); 1509566063dSJacob Faibussowitsch PetscCall(VecZeroEntries(x)); 1519566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nboundary_nodes * bs, &boundary_indices, nboundary_nodes * bs, &boundary_values)); 152c4762a1bSJed Brown for (k = 0; k < nboundary_nodes; k++) { 153c4762a1bSJed Brown Ii = boundary_nodes[k] * bs; 154c4762a1bSJed Brown v = 1.0 * boundary_nodes[k]; 155c4762a1bSJed Brown for (b = 0; b < bs; b++, Ii++) { 156c4762a1bSJed Brown boundary_indices[k * bs + b] = Ii; 157c4762a1bSJed Brown boundary_values[k * bs + b] = v; 1589566063dSJacob Faibussowitsch PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %" PetscInt_FMT " %f\n", rank, Ii, (double)PetscRealPart(v))); 159c4762a1bSJed Brown v += 0.1; 160c4762a1bSJed Brown } 161c4762a1bSJed Brown } 1629566063dSJacob Faibussowitsch PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL)); 1639566063dSJacob Faibussowitsch PetscCall(VecSetValues(x, nboundary_nodes * bs, boundary_indices, boundary_values, INSERT_VALUES)); 1649566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x)); 1659566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x)); 166c4762a1bSJed Brown 167c4762a1bSJed Brown /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x and overwriting the boundary entries with boundary values */ 1689566063dSJacob Faibussowitsch PetscCall(VecDuplicate(x, &y)); 1699566063dSJacob Faibussowitsch PetscCall(MatMult(A, x, y)); 1709566063dSJacob Faibussowitsch PetscCall(VecAYPX(y, -1.0, rhs)); 171c4762a1bSJed Brown for (k = 0; k < nboundary_nodes * bs; k++) boundary_values[k] *= diag; 1729566063dSJacob Faibussowitsch PetscCall(VecSetValues(y, nboundary_nodes * bs, boundary_indices, boundary_values, INSERT_VALUES)); 1739566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(y)); 1749566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(y)); 175c4762a1bSJed Brown 1769566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n")); 1779566063dSJacob Faibussowitsch PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 1789566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); 179c4762a1bSJed Brown 1809566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumns(A, nboundary_nodes * bs, boundary_indices, diag, x, rhs)); 1819566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n")); 1829566063dSJacob Faibussowitsch PetscCall(VecView(rhs, PETSC_VIEWER_STDOUT_WORLD)); 1839566063dSJacob Faibussowitsch PetscCall(VecAXPY(y, -1.0, rhs)); 1849566063dSJacob Faibussowitsch PetscCall(VecNorm(y, NORM_INFINITY, &norm)); 185c4762a1bSJed Brown if (norm > 1.0e-10) { 1869566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm)); 1879566063dSJacob Faibussowitsch PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD)); 188c4762a1bSJed Brown SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns"); 189c4762a1bSJed Brown } 190c4762a1bSJed Brown 1919566063dSJacob Faibussowitsch PetscCall(PetscFree(boundary_nodes)); 1929566063dSJacob Faibussowitsch PetscCall(PetscFree2(boundary_indices, boundary_values)); 1939566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 1949566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y)); 1959566063dSJacob Faibussowitsch PetscCall(VecDestroy(&rhs)); 1969566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 197c4762a1bSJed Brown 1989566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 199b122ec5aSJacob Faibussowitsch return 0; 200c4762a1bSJed Brown } 201c4762a1bSJed Brown 202c4762a1bSJed Brown /*TEST 203c4762a1bSJed Brown 204c4762a1bSJed Brown test: 2059fd945daSStefano Zampini diff_args: -j 206c4762a1bSJed Brown suffix: 0 207c4762a1bSJed Brown 208c4762a1bSJed Brown test: 2099fd945daSStefano Zampini diff_args: -j 210c4762a1bSJed Brown suffix: 1 211c4762a1bSJed Brown nsize: 2 212c4762a1bSJed Brown 213c4762a1bSJed Brown test: 2149fd945daSStefano Zampini diff_args: -j 215c4762a1bSJed Brown suffix: 10 216c4762a1bSJed Brown nsize: 2 217c4762a1bSJed Brown args: -bs 2 -nonlocal_bc 218c4762a1bSJed Brown 219c4762a1bSJed Brown test: 2209fd945daSStefano Zampini diff_args: -j 221c4762a1bSJed Brown suffix: 11 222c4762a1bSJed Brown nsize: 7 223c4762a1bSJed Brown args: -bs 2 -nonlocal_bc 224c4762a1bSJed Brown 225c4762a1bSJed Brown test: 2269fd945daSStefano Zampini diff_args: -j 227c4762a1bSJed Brown suffix: 12 228c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -mat_type baij 229c4762a1bSJed Brown 230c4762a1bSJed Brown test: 2319fd945daSStefano Zampini diff_args: -j 232c4762a1bSJed Brown suffix: 13 233c4762a1bSJed Brown nsize: 2 234c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -mat_type baij 235c4762a1bSJed Brown 236c4762a1bSJed Brown test: 2379fd945daSStefano Zampini diff_args: -j 238c4762a1bSJed Brown suffix: 14 239c4762a1bSJed Brown nsize: 7 240c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -mat_type baij 241c4762a1bSJed Brown 242c4762a1bSJed Brown test: 2439fd945daSStefano Zampini diff_args: -j 244c4762a1bSJed Brown suffix: 2 245c4762a1bSJed Brown nsize: 7 246c4762a1bSJed Brown 247c4762a1bSJed Brown test: 2489fd945daSStefano Zampini diff_args: -j 249c4762a1bSJed Brown suffix: 3 250c4762a1bSJed Brown args: -mat_type baij 251c4762a1bSJed Brown 252c4762a1bSJed Brown test: 2539fd945daSStefano Zampini diff_args: -j 254c4762a1bSJed Brown suffix: 4 255c4762a1bSJed Brown nsize: 2 256c4762a1bSJed Brown args: -mat_type baij 257c4762a1bSJed Brown 258c4762a1bSJed Brown test: 2599fd945daSStefano Zampini diff_args: -j 260c4762a1bSJed Brown suffix: 5 261c4762a1bSJed Brown nsize: 7 262c4762a1bSJed Brown args: -mat_type baij 263c4762a1bSJed Brown 264c4762a1bSJed Brown test: 2659fd945daSStefano Zampini diff_args: -j 266c4762a1bSJed Brown suffix: 6 267c4762a1bSJed Brown args: -bs 2 -mat_type baij 268c4762a1bSJed Brown 269c4762a1bSJed Brown test: 2709fd945daSStefano Zampini diff_args: -j 271c4762a1bSJed Brown suffix: 7 272c4762a1bSJed Brown nsize: 2 273c4762a1bSJed Brown args: -bs 2 -mat_type baij 274c4762a1bSJed Brown 275c4762a1bSJed Brown test: 2769fd945daSStefano Zampini diff_args: -j 277c4762a1bSJed Brown suffix: 8 278c4762a1bSJed Brown nsize: 7 279c4762a1bSJed Brown args: -bs 2 -mat_type baij 280c4762a1bSJed Brown 281c4762a1bSJed Brown test: 2829fd945daSStefano Zampini diff_args: -j 283c4762a1bSJed Brown suffix: 9 284c4762a1bSJed Brown args: -bs 2 -nonlocal_bc 285c4762a1bSJed Brown 286c4762a1bSJed Brown test: 2879fd945daSStefano Zampini diff_args: -j 288c4762a1bSJed Brown suffix: 15 289c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -convname shell 290c4762a1bSJed Brown 291c4762a1bSJed Brown test: 2929fd945daSStefano Zampini diff_args: -j 293c4762a1bSJed Brown suffix: 16 294c4762a1bSJed Brown nsize: 2 295c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -convname shell 296c4762a1bSJed Brown 297c4762a1bSJed Brown test: 2989fd945daSStefano Zampini diff_args: -j 299c4762a1bSJed Brown suffix: 17 300c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -convname dense 301c4762a1bSJed Brown 302c4762a1bSJed Brown testset: 3039fd945daSStefano Zampini diff_args: -j 304c4762a1bSJed Brown suffix: full 305c4762a1bSJed Brown nsize: {{1 3}separate output} 306c4762a1bSJed Brown args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0 30738a8e8c1SStefano Zampini 30838a8e8c1SStefano Zampini test: 3099fd945daSStefano Zampini diff_args: -j 31038a8e8c1SStefano Zampini requires: cuda 31138a8e8c1SStefano Zampini suffix: cusparse_1 31238a8e8c1SStefano Zampini nsize: 1 31338a8e8c1SStefano Zampini args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output} 31438a8e8c1SStefano Zampini 31538a8e8c1SStefano Zampini test: 3169fd945daSStefano Zampini diff_args: -j 31738a8e8c1SStefano Zampini requires: cuda 31838a8e8c1SStefano Zampini suffix: cusparse_3 31938a8e8c1SStefano Zampini nsize: 3 32038a8e8c1SStefano Zampini args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse 32138a8e8c1SStefano Zampini 322c4762a1bSJed Brown TEST*/ 323