1c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ 6c4762a1bSJed Brown int global_index(PetscInt i,PetscInt j,PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; } 7c4762a1bSJed Brown 8c4762a1bSJed Brown int main(int argc,char **argv) 9c4762a1bSJed Brown { 10c4762a1bSJed Brown Mat A,B,C,PtAP,PtAP_copy,PtAP_squared; 11c4762a1bSJed Brown PetscInt i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1; 12c4762a1bSJed Brown PetscScalar v; 13c4762a1bSJed Brown PetscErrorCode ierr; 14c4762a1bSJed Brown PetscBool equal=PETSC_FALSE,mat_view=PETSC_FALSE; 15c4762a1bSJed Brown char stencil[PETSC_MAX_PATH_LEN]; 16c4762a1bSJed Brown #if defined(PETSC_USE_LOG) 17c4762a1bSJed Brown PetscLogStage fullMatMatMultStage; 18c4762a1bSJed Brown #endif 19c4762a1bSJed Brown 20c4762a1bSJed Brown ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 21c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); 22c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); 23c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL);CHKERRQ(ierr); 24c4762a1bSJed Brown ierr = PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view);CHKERRQ(ierr); 25589a23caSBarry Smith ierr = PetscOptionsGetString(NULL,NULL,"-stencil",stencil,sizeof(stencil),NULL);CHKERRQ(ierr); 26c4762a1bSJed Brown 27c4762a1bSJed Brown /* Create a aij matrix A */ 28c4762a1bSJed Brown M = N = m*n*o; 29c4762a1bSJed Brown ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); 30c4762a1bSJed Brown ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 31c4762a1bSJed Brown ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr); 32c4762a1bSJed Brown ierr = MatSetFromOptions(A);CHKERRQ(ierr); 33c4762a1bSJed Brown 34c4762a1bSJed Brown /* Consistency checks */ 35*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(o < 1 || m < 1 || n < 1,PETSC_COMM_WORLD,PETSC_ERR_USER,"Dimensions need to be larger than zero!"); 36c4762a1bSJed Brown 37c4762a1bSJed Brown /************ 2D stencils ***************/ 38c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "2d5point", &equal);CHKERRQ(ierr); 39c4762a1bSJed Brown if (equal) { /* 5-point stencil, 2D */ 40c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr); 41c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr); 42c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 43c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 44c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 45c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 46c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 47c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 48c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 49c4762a1bSJed Brown v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 50c4762a1bSJed Brown } 51c4762a1bSJed Brown } 52c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "2d9point", &equal);CHKERRQ(ierr); 53c4762a1bSJed Brown if (equal) { /* 9-point stencil, 2D */ 54c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr); 55c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr); 56c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 57c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 58c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 59c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j, k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 60c4762a1bSJed Brown if (i>0 && j>0) {J = global_index(i-1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 61c4762a1bSJed Brown if (j>0) {J = global_index(i, j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 62c4762a1bSJed Brown if (i<m-1 && j>0) {J = global_index(i+1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 63c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j, k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 64c4762a1bSJed Brown if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 65c4762a1bSJed Brown if (j<n-1) {J = global_index(i, j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 66c4762a1bSJed Brown if (i>0 && j<n-1) {J = global_index(i-1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 67c4762a1bSJed Brown v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 68c4762a1bSJed Brown } 69c4762a1bSJed Brown } 70c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "2d9point2", &equal);CHKERRQ(ierr); 71c4762a1bSJed Brown if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ 72c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr); 73c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr); 74c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 75c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 76c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 77c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 78c4762a1bSJed Brown if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 79c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 80c4762a1bSJed Brown if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 81c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 82c4762a1bSJed Brown if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 83c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 84c4762a1bSJed Brown if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 85c4762a1bSJed Brown v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 86c4762a1bSJed Brown } 87c4762a1bSJed Brown } 88c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "2d13point", &equal);CHKERRQ(ierr); 89c4762a1bSJed Brown if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ 90c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr); 91c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr); 92c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 93c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 94c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 95c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 96c4762a1bSJed Brown if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 97c4762a1bSJed Brown if (i>2) {J = global_index(i-3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 98c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 99c4762a1bSJed Brown if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 100c4762a1bSJed Brown if (i<m-3) {J = global_index(i+3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 101c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 102c4762a1bSJed Brown if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 103c4762a1bSJed Brown if (j>2) {J = global_index(i,j-3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 104c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 105c4762a1bSJed Brown if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 106c4762a1bSJed Brown if (j<n-3) {J = global_index(i,j+3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 107c4762a1bSJed Brown v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 108c4762a1bSJed Brown } 109c4762a1bSJed Brown } 110c4762a1bSJed Brown /************ 3D stencils ***************/ 111c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "3d7point", &equal);CHKERRQ(ierr); 112c4762a1bSJed Brown if (equal) { /* 7-point stencil, 3D */ 113c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,7,NULL,7,NULL);CHKERRQ(ierr); 114c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,7,NULL);CHKERRQ(ierr); 115c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 116c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 117c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 118c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 119c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 120c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 121c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 122c4762a1bSJed Brown if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 123c4762a1bSJed Brown if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 124c4762a1bSJed Brown v = 6.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 125c4762a1bSJed Brown } 126c4762a1bSJed Brown } 127c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "3d13point", &equal);CHKERRQ(ierr); 128c4762a1bSJed Brown if (equal) { /* 13-point stencil, 3D */ 129c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr); 130c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr); 131c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 132c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 133c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 134c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 135c4762a1bSJed Brown if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 136c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 137c4762a1bSJed Brown if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 138c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 139c4762a1bSJed Brown if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 140c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 141c4762a1bSJed Brown if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 142c4762a1bSJed Brown if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 143c4762a1bSJed Brown if (k>1) {J = global_index(i,j,k-2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 144c4762a1bSJed Brown if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 145c4762a1bSJed Brown if (k<o-2) {J = global_index(i,j,k+2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 146c4762a1bSJed Brown v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 147c4762a1bSJed Brown } 148c4762a1bSJed Brown } 149c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "3d19point", &equal);CHKERRQ(ierr); 150c4762a1bSJed Brown if (equal) { /* 19-point stencil, 3D */ 151c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,19,NULL,19,NULL);CHKERRQ(ierr); 152c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,19,NULL);CHKERRQ(ierr); 153c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 154c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 155c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 156c4762a1bSJed Brown /* one hop */ 157c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 158c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 159c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 160c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 161c4762a1bSJed Brown if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 162c4762a1bSJed Brown if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 163c4762a1bSJed Brown /* two hops */ 164c4762a1bSJed Brown if (i>0 && j>0) {J = global_index(i-1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 165c4762a1bSJed Brown if (i>0 && k>0) {J = global_index(i-1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 166c4762a1bSJed Brown if (i>0 && j<n-1) {J = global_index(i-1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 167c4762a1bSJed Brown if (i>0 && k<o-1) {J = global_index(i-1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 168c4762a1bSJed Brown if (i<m-1 && j>0) {J = global_index(i+1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 169c4762a1bSJed Brown if (i<m-1 && k>0) {J = global_index(i+1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 170c4762a1bSJed Brown if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 171c4762a1bSJed Brown if (i<m-1 && k<o-1) {J = global_index(i+1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 172c4762a1bSJed Brown if (j>0 && k>0) {J = global_index(i, j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 173c4762a1bSJed Brown if (j>0 && k<o-1) {J = global_index(i, j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 174c4762a1bSJed Brown if (j<n-1 && k>0) {J = global_index(i, j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 175c4762a1bSJed Brown if (j<n-1 && k<o-1) {J = global_index(i, j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 176c4762a1bSJed Brown v = 18.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 177c4762a1bSJed Brown } 178c4762a1bSJed Brown } 179c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "3d27point", &equal);CHKERRQ(ierr); 180c4762a1bSJed Brown if (equal) { /* 27-point stencil, 3D */ 181c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,27,NULL,27,NULL);CHKERRQ(ierr); 182c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,27,NULL);CHKERRQ(ierr); 183c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 184c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 185c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 186c4762a1bSJed Brown if (k>0) { 187c4762a1bSJed Brown if (j>0) { 188c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 189c4762a1bSJed Brown J = global_index(i, j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 190c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 191c4762a1bSJed Brown } 192c4762a1bSJed Brown { 193c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 194c4762a1bSJed Brown J = global_index(i, j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 195c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 196c4762a1bSJed Brown } 197c4762a1bSJed Brown if (j<n-1) { 198c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 199c4762a1bSJed Brown J = global_index(i, j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 200c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 201c4762a1bSJed Brown } 202c4762a1bSJed Brown } 203c4762a1bSJed Brown { 204c4762a1bSJed Brown if (j>0) { 205c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 206c4762a1bSJed Brown J = global_index(i, j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 207c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 208c4762a1bSJed Brown } 209c4762a1bSJed Brown { 210c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 211c4762a1bSJed Brown J = global_index(i, j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 212c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 213c4762a1bSJed Brown } 214c4762a1bSJed Brown if (j<n-1) { 215c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 216c4762a1bSJed Brown J = global_index(i, j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 217c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 218c4762a1bSJed Brown } 219c4762a1bSJed Brown } 220c4762a1bSJed Brown if (k<o-1) { 221c4762a1bSJed Brown if (j>0) { 222c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 223c4762a1bSJed Brown J = global_index(i, j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 224c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 225c4762a1bSJed Brown } 226c4762a1bSJed Brown { 227c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 228c4762a1bSJed Brown J = global_index(i, j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 229c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 230c4762a1bSJed Brown } 231c4762a1bSJed Brown if (j<n-1) { 232c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 233c4762a1bSJed Brown J = global_index(i, j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 234c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 235c4762a1bSJed Brown } 236c4762a1bSJed Brown } 237c4762a1bSJed Brown v = 26.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 238c4762a1bSJed Brown } 239c4762a1bSJed Brown } 240c4762a1bSJed Brown ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 241c4762a1bSJed Brown ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 242c4762a1bSJed Brown 243c4762a1bSJed Brown /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ 244c4762a1bSJed Brown ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 245c4762a1bSJed Brown 246c4762a1bSJed Brown ierr = PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage);CHKERRQ(ierr); 247c4762a1bSJed Brown 248c4762a1bSJed Brown /* Test C = A*B */ 249c4762a1bSJed Brown ierr = PetscLogStagePush(fullMatMatMultStage);CHKERRQ(ierr); 250c4762a1bSJed Brown ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C);CHKERRQ(ierr); 251c4762a1bSJed Brown 252c4762a1bSJed Brown /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ 253c4762a1bSJed Brown ierr = MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP);CHKERRQ(ierr); 254c4762a1bSJed Brown ierr = MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy);CHKERRQ(ierr); 255c4762a1bSJed Brown ierr = MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared);CHKERRQ(ierr); 256c4762a1bSJed Brown 257c4762a1bSJed Brown ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 258c4762a1bSJed Brown ierr = MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 259c4762a1bSJed Brown 260c4762a1bSJed Brown ierr = MatDestroy(&PtAP_squared);CHKERRQ(ierr); 261c4762a1bSJed Brown ierr = MatDestroy(&PtAP_copy);CHKERRQ(ierr); 262c4762a1bSJed Brown ierr = MatDestroy(&PtAP);CHKERRQ(ierr); 263c4762a1bSJed Brown ierr = MatDestroy(&C);CHKERRQ(ierr); 264c4762a1bSJed Brown ierr = MatDestroy(&B);CHKERRQ(ierr); 265c4762a1bSJed Brown ierr = MatDestroy(&A);CHKERRQ(ierr); 266c4762a1bSJed Brown ierr = PetscFinalize(); 267c4762a1bSJed Brown return ierr; 268c4762a1bSJed Brown } 269c4762a1bSJed Brown 270c4762a1bSJed Brown /*TEST 271c4762a1bSJed Brown 272c4762a1bSJed Brown test: 273c4762a1bSJed Brown suffix: 1 274c4762a1bSJed Brown nsize: 1 275c20d7725SJed Brown args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted 276c4762a1bSJed Brown 277c4762a1bSJed Brown test: 278c4762a1bSJed Brown suffix: 2 279c4762a1bSJed Brown nsize: 1 280c4762a1bSJed Brown args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge 281c4762a1bSJed Brown 282c4762a1bSJed Brown test: 283c4762a1bSJed Brown suffix: 3 284c4762a1bSJed Brown nsize: 4 285c4762a1bSJed Brown args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi 286c4762a1bSJed Brown 287c4762a1bSJed Brown TEST*/ 288