1c4762a1bSJed Brown static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\ 2c4762a1bSJed Brown Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n"; 3c4762a1bSJed Brown 4c4762a1bSJed Brown #include <petscmat.h> 5c4762a1bSJed Brown 6c4762a1bSJed Brown int main(int argc,char **args) 7c4762a1bSJed Brown { 8c4762a1bSJed Brown Mat A; 9c4762a1bSJed Brown Vec x, rhs, y; 10c4762a1bSJed Brown PetscInt i,j,k,b,m = 3,n,nlocal=2,bs=1,Ii,J; 11c4762a1bSJed Brown PetscInt *boundary_nodes, nboundary_nodes, *boundary_indices; 12c4762a1bSJed Brown PetscMPIInt rank,size; 13c4762a1bSJed Brown PetscScalar v,v0,v1,v2,a0=0.1,a,rhsval, *boundary_values,diag = 1.0; 14c4762a1bSJed Brown PetscReal norm; 15c4762a1bSJed Brown char convname[64]; 16c4762a1bSJed Brown PetscBool upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE; 17c4762a1bSJed Brown 18*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscInitialize(&argc,&args,(char*)0,help)); 195f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 205f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 21c4762a1bSJed Brown n = nlocal*size; 22c4762a1bSJed Brown 235f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetInt(NULL,NULL, "-bs", &bs, NULL)); 245f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetBool(NULL,NULL, "-nonlocal_bc", &nonlocalBC, NULL)); 255f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetScalar(NULL,NULL, "-diag", &diag, NULL)); 265f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetString(NULL,NULL,"-convname",convname,sizeof(convname),&convert)); 275f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetBool(NULL,NULL, "-zerorhs", &zerorhs, NULL)); 28c4762a1bSJed Brown 295f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(PETSC_COMM_WORLD,&A)); 305f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n*bs,m*n*bs)); 315f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetFromOptions(A)); 325f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(A)); 33c4762a1bSJed Brown 345f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateVecs(A, NULL, &rhs)); 355f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetFromOptions(rhs)); 365f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetUp(rhs)); 37c4762a1bSJed Brown 38c4762a1bSJed Brown rhsval = 0.0; 39c4762a1bSJed Brown for (i=0; i<m; i++) { 40c4762a1bSJed Brown for (j=nlocal*rank; j<nlocal*(rank+1); j++) { 41c4762a1bSJed Brown a = a0; 42c4762a1bSJed Brown for (b=0; b<bs; b++) { 43c4762a1bSJed Brown /* let's start with a 5-point stencil diffusion term */ 44c4762a1bSJed Brown v = -1.0; Ii = (j + n*i)*bs + b; 455f80ce2aSJacob Faibussowitsch if (i>0) {J = Ii - n*bs; CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 465f80ce2aSJacob Faibussowitsch if (i<m-1) {J = Ii + n*bs; CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 475f80ce2aSJacob Faibussowitsch if (j>0) {J = Ii - 1*bs; CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 485f80ce2aSJacob Faibussowitsch if (j<n-1) {J = Ii + 1*bs; CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 495f80ce2aSJacob Faibussowitsch v = 4.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES)); 50c4762a1bSJed Brown if (upwind) { 51c4762a1bSJed Brown /* now add a 2nd order upwind advection term to add a little asymmetry */ 52c4762a1bSJed Brown if (j>2) { 53c4762a1bSJed Brown J = Ii-2*bs; v2 = 0.5*a; v1 = -2.0*a; v0 = 1.5*a; 545f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v2,ADD_VALUES)); 55c4762a1bSJed Brown } else { 56c4762a1bSJed Brown /* fall back to 1st order upwind */ 57c4762a1bSJed Brown v1 = -1.0*a; v0 = 1.0*a; 58c4762a1bSJed Brown }; 595f80ce2aSJacob Faibussowitsch if (j>1) {J = Ii-1*bs; CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v1,ADD_VALUES));} 605f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v0,ADD_VALUES)); 61c4762a1bSJed Brown a /= 10.; /* use a different velocity for the next component */ 62c4762a1bSJed Brown /* add a coupling to the previous and next components */ 63c4762a1bSJed Brown v = 0.5; 645f80ce2aSJacob Faibussowitsch if (b>0) {J = Ii - 1; CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 655f80ce2aSJacob Faibussowitsch if (b<bs-1) {J = Ii + 1; CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 66c4762a1bSJed Brown } 67c4762a1bSJed Brown /* make up some rhs */ 685f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetValue(rhs, Ii, rhsval, INSERT_VALUES)); 69c4762a1bSJed Brown rhsval += 1.0; 70c4762a1bSJed Brown } 71c4762a1bSJed Brown } 72c4762a1bSJed Brown } 735f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 745f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 75c4762a1bSJed Brown 76c4762a1bSJed Brown if (convert) { /* Test different Mat implementations */ 77c4762a1bSJed Brown Mat B; 78c4762a1bSJed Brown 795f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(A,convname,MAT_INITIAL_MATRIX,&B)); 805f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 81c4762a1bSJed Brown A = B; 82c4762a1bSJed Brown } 83c4762a1bSJed Brown 845f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyBegin(rhs)); 855f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyEnd(rhs)); 86c4762a1bSJed Brown /* set rhs to zero to simplify */ 87c4762a1bSJed Brown if (zerorhs) { 885f80ce2aSJacob Faibussowitsch CHKERRQ(VecZeroEntries(rhs)); 89c4762a1bSJed Brown } 90c4762a1bSJed Brown 91c4762a1bSJed Brown if (nonlocalBC) { 92c4762a1bSJed Brown /*version where boundary conditions are set by processes that don't necessarily own the nodes */ 93dd400576SPatrick Sanan if (rank == 0) { 94c4762a1bSJed Brown nboundary_nodes = size>m ? nlocal : m-size+nlocal; 955f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(nboundary_nodes,&boundary_nodes)); 96c4762a1bSJed Brown k = 0; 97c4762a1bSJed Brown for (i=size; i<m; i++,k++) {boundary_nodes[k] = n*i;}; 98c4762a1bSJed Brown } else if (rank < m) { 99c4762a1bSJed Brown nboundary_nodes = nlocal+1; 1005f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(nboundary_nodes,&boundary_nodes)); 101c4762a1bSJed Brown boundary_nodes[0] = rank*n; 102c4762a1bSJed Brown k = 1; 103c4762a1bSJed Brown } else { 104c4762a1bSJed Brown nboundary_nodes = nlocal; 1055f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(nboundary_nodes,&boundary_nodes)); 106c4762a1bSJed Brown k = 0; 107c4762a1bSJed Brown } 108c4762a1bSJed Brown for (j=nlocal*rank; j<nlocal*(rank+1); j++,k++) {boundary_nodes[k] = j;}; 109c4762a1bSJed Brown } else { 110c4762a1bSJed Brown /*version where boundary conditions are set by the node owners only */ 1115f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(m*n,&boundary_nodes)); 112c4762a1bSJed Brown k=0; 113c4762a1bSJed Brown for (j=0; j<n; j++) { 114c4762a1bSJed Brown Ii = j; 115c4762a1bSJed Brown if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii; 116c4762a1bSJed Brown } 117c4762a1bSJed Brown for (i=1; i<m; i++) { 118c4762a1bSJed Brown Ii = n*i; 119c4762a1bSJed Brown if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii; 120c4762a1bSJed Brown } 121c4762a1bSJed Brown nboundary_nodes = k; 122c4762a1bSJed Brown } 123c4762a1bSJed Brown 1245f80ce2aSJacob Faibussowitsch CHKERRQ(VecDuplicate(rhs, &x)); 1255f80ce2aSJacob Faibussowitsch CHKERRQ(VecZeroEntries(x)); 1265f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(nboundary_nodes*bs,&boundary_indices,nboundary_nodes*bs,&boundary_values)); 127c4762a1bSJed Brown for (k=0; k<nboundary_nodes; k++) { 128c4762a1bSJed Brown Ii = boundary_nodes[k]*bs; 129c4762a1bSJed Brown v = 1.0*boundary_nodes[k]; 130c4762a1bSJed Brown for (b=0; b<bs; b++, Ii++) { 131c4762a1bSJed Brown boundary_indices[k*bs+b] = Ii; 132c4762a1bSJed Brown boundary_values[k*bs+b] = v; 1335f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %" PetscInt_FMT " %f\n", rank, Ii, (double)PetscRealPart(v))); 134c4762a1bSJed Brown v += 0.1; 135c4762a1bSJed Brown } 136c4762a1bSJed Brown } 1375f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL)); 1385f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetValues(x, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES)); 1395f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyBegin(x)); 1405f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyEnd(x)); 141c4762a1bSJed Brown 142c4762a1bSJed Brown /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x and overwriting the boundary entries with boundary values */ 1435f80ce2aSJacob Faibussowitsch CHKERRQ(VecDuplicate(x, &y)); 1445f80ce2aSJacob Faibussowitsch CHKERRQ(MatMult(A, x, y)); 1455f80ce2aSJacob Faibussowitsch CHKERRQ(VecAYPX(y, -1.0, rhs)); 146c4762a1bSJed Brown for (k=0; k<nboundary_nodes*bs; k++) boundary_values[k] *= diag; 1475f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetValues(y, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES)); 1485f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyBegin(y)); 1495f80ce2aSJacob Faibussowitsch CHKERRQ(VecAssemblyEnd(y)); 150c4762a1bSJed Brown 1515f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n")); 1525f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 1535f80ce2aSJacob Faibussowitsch CHKERRQ(VecView(x,PETSC_VIEWER_STDOUT_WORLD)); 154c4762a1bSJed Brown 1555f80ce2aSJacob Faibussowitsch CHKERRQ(MatZeroRowsColumns(A, nboundary_nodes*bs, boundary_indices, diag, x, rhs)); 1565f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n")); 1575f80ce2aSJacob Faibussowitsch CHKERRQ(VecView(rhs,PETSC_VIEWER_STDOUT_WORLD)); 1585f80ce2aSJacob Faibussowitsch CHKERRQ(VecAXPY(y, -1.0, rhs)); 1595f80ce2aSJacob Faibussowitsch CHKERRQ(VecNorm(y, NORM_INFINITY, &norm)); 160c4762a1bSJed Brown if (norm > 1.0e-10) { 1615f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm)); 1625f80ce2aSJacob Faibussowitsch CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 163c4762a1bSJed Brown SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns"); 164c4762a1bSJed Brown } 165c4762a1bSJed Brown 1665f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(boundary_nodes)); 1675f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(boundary_indices,boundary_values)); 1685f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&x)); 1695f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&y)); 1705f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&rhs)); 1715f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 172c4762a1bSJed Brown 173*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscFinalize()); 174*b122ec5aSJacob Faibussowitsch return 0; 175c4762a1bSJed Brown } 176c4762a1bSJed Brown 177c4762a1bSJed Brown /*TEST 178c4762a1bSJed Brown 179c4762a1bSJed Brown test: 1809fd945daSStefano Zampini diff_args: -j 181c4762a1bSJed Brown suffix: 0 182c4762a1bSJed Brown 183c4762a1bSJed Brown test: 1849fd945daSStefano Zampini diff_args: -j 185c4762a1bSJed Brown suffix: 1 186c4762a1bSJed Brown nsize: 2 187c4762a1bSJed Brown 188c4762a1bSJed Brown test: 1899fd945daSStefano Zampini diff_args: -j 190c4762a1bSJed Brown suffix: 10 191c4762a1bSJed Brown nsize: 2 192c4762a1bSJed Brown args: -bs 2 -nonlocal_bc 193c4762a1bSJed Brown 194c4762a1bSJed Brown test: 1959fd945daSStefano Zampini diff_args: -j 196c4762a1bSJed Brown suffix: 11 197c4762a1bSJed Brown nsize: 7 198c4762a1bSJed Brown args: -bs 2 -nonlocal_bc 199c4762a1bSJed Brown 200c4762a1bSJed Brown test: 2019fd945daSStefano Zampini diff_args: -j 202c4762a1bSJed Brown suffix: 12 203c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -mat_type baij 204c4762a1bSJed Brown 205c4762a1bSJed Brown test: 2069fd945daSStefano Zampini diff_args: -j 207c4762a1bSJed Brown suffix: 13 208c4762a1bSJed Brown nsize: 2 209c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -mat_type baij 210c4762a1bSJed Brown 211c4762a1bSJed Brown test: 2129fd945daSStefano Zampini diff_args: -j 213c4762a1bSJed Brown suffix: 14 214c4762a1bSJed Brown nsize: 7 215c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -mat_type baij 216c4762a1bSJed Brown 217c4762a1bSJed Brown test: 2189fd945daSStefano Zampini diff_args: -j 219c4762a1bSJed Brown suffix: 2 220c4762a1bSJed Brown nsize: 7 221c4762a1bSJed Brown 222c4762a1bSJed Brown test: 2239fd945daSStefano Zampini diff_args: -j 224c4762a1bSJed Brown suffix: 3 225c4762a1bSJed Brown args: -mat_type baij 226c4762a1bSJed Brown 227c4762a1bSJed Brown test: 2289fd945daSStefano Zampini diff_args: -j 229c4762a1bSJed Brown suffix: 4 230c4762a1bSJed Brown nsize: 2 231c4762a1bSJed Brown args: -mat_type baij 232c4762a1bSJed Brown 233c4762a1bSJed Brown test: 2349fd945daSStefano Zampini diff_args: -j 235c4762a1bSJed Brown suffix: 5 236c4762a1bSJed Brown nsize: 7 237c4762a1bSJed Brown args: -mat_type baij 238c4762a1bSJed Brown 239c4762a1bSJed Brown test: 2409fd945daSStefano Zampini diff_args: -j 241c4762a1bSJed Brown suffix: 6 242c4762a1bSJed Brown args: -bs 2 -mat_type baij 243c4762a1bSJed Brown 244c4762a1bSJed Brown test: 2459fd945daSStefano Zampini diff_args: -j 246c4762a1bSJed Brown suffix: 7 247c4762a1bSJed Brown nsize: 2 248c4762a1bSJed Brown args: -bs 2 -mat_type baij 249c4762a1bSJed Brown 250c4762a1bSJed Brown test: 2519fd945daSStefano Zampini diff_args: -j 252c4762a1bSJed Brown suffix: 8 253c4762a1bSJed Brown nsize: 7 254c4762a1bSJed Brown args: -bs 2 -mat_type baij 255c4762a1bSJed Brown 256c4762a1bSJed Brown test: 2579fd945daSStefano Zampini diff_args: -j 258c4762a1bSJed Brown suffix: 9 259c4762a1bSJed Brown args: -bs 2 -nonlocal_bc 260c4762a1bSJed Brown 261c4762a1bSJed Brown test: 2629fd945daSStefano Zampini diff_args: -j 263c4762a1bSJed Brown suffix: 15 264c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -convname shell 265c4762a1bSJed Brown 266c4762a1bSJed Brown test: 2679fd945daSStefano Zampini diff_args: -j 268c4762a1bSJed Brown suffix: 16 269c4762a1bSJed Brown nsize: 2 270c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -convname shell 271c4762a1bSJed Brown 272c4762a1bSJed Brown test: 2739fd945daSStefano Zampini diff_args: -j 274c4762a1bSJed Brown suffix: 17 275c4762a1bSJed Brown args: -bs 2 -nonlocal_bc -convname dense 276c4762a1bSJed Brown 277c4762a1bSJed Brown testset: 2789fd945daSStefano Zampini diff_args: -j 279c4762a1bSJed Brown suffix: full 280c4762a1bSJed Brown nsize: {{1 3}separate output} 281c4762a1bSJed Brown args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0 28238a8e8c1SStefano Zampini 28338a8e8c1SStefano Zampini test: 2849fd945daSStefano Zampini diff_args: -j 28538a8e8c1SStefano Zampini requires: cuda 28638a8e8c1SStefano Zampini suffix: cusparse_1 28738a8e8c1SStefano Zampini nsize: 1 28838a8e8c1SStefano Zampini args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output} 28938a8e8c1SStefano Zampini 29038a8e8c1SStefano Zampini test: 2919fd945daSStefano Zampini diff_args: -j 29238a8e8c1SStefano Zampini requires: cuda 29338a8e8c1SStefano Zampini suffix: cusparse_3 29438a8e8c1SStefano Zampini nsize: 3 29538a8e8c1SStefano Zampini args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse 29638a8e8c1SStefano Zampini 297c4762a1bSJed Brown TEST*/ 298