1c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ 6c4762a1bSJed Brown int global_index(PetscInt i,PetscInt j,PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; } 7c4762a1bSJed Brown 8c4762a1bSJed Brown int main(int argc,char **argv) 9c4762a1bSJed Brown { 10c4762a1bSJed Brown Mat A,B,C,PtAP,PtAP_copy,PtAP_squared; 11c4762a1bSJed Brown PetscInt i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1; 12c4762a1bSJed Brown PetscScalar v; 13c4762a1bSJed Brown PetscBool equal=PETSC_FALSE,mat_view=PETSC_FALSE; 14c4762a1bSJed Brown char stencil[PETSC_MAX_PATH_LEN]; 15c4762a1bSJed Brown #if defined(PETSC_USE_LOG) 16c4762a1bSJed Brown PetscLogStage fullMatMatMultStage; 17c4762a1bSJed Brown #endif 18c4762a1bSJed Brown 19*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscInitialize(&argc,&argv,(char*)0,help)); 205f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL)); 215f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 225f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL)); 235f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view)); 245f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetString(NULL,NULL,"-stencil",stencil,sizeof(stencil),NULL)); 25c4762a1bSJed Brown 26c4762a1bSJed Brown /* Create a aij matrix A */ 27c4762a1bSJed Brown M = N = m*n*o; 285f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(PETSC_COMM_WORLD,&A)); 295f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N)); 305f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(A,MATAIJ)); 315f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetFromOptions(A)); 32c4762a1bSJed Brown 33c4762a1bSJed Brown /* Consistency checks */ 342c71b3e2SJacob Faibussowitsch PetscCheckFalse(o < 1 || m < 1 || n < 1,PETSC_COMM_WORLD,PETSC_ERR_USER,"Dimensions need to be larger than zero!"); 35c4762a1bSJed Brown 36c4762a1bSJed Brown /************ 2D stencils ***************/ 375f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "2d5point", &equal)); 38c4762a1bSJed Brown if (equal) { /* 5-point stencil, 2D */ 395f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,5,NULL,5,NULL)); 405f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,5,NULL)); 415f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 42c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 43c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 445f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 455f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 465f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 475f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 485f80ce2aSJacob Faibussowitsch v = 4.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 49c4762a1bSJed Brown } 50c4762a1bSJed Brown } 515f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "2d9point", &equal)); 52c4762a1bSJed Brown if (equal) { /* 9-point stencil, 2D */ 535f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,9,NULL,9,NULL)); 545f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,9,NULL)); 555f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 56c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 57c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 585f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 595f80ce2aSJacob Faibussowitsch if (i>0 && j>0) {J = global_index(i-1,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 605f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i, j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 615f80ce2aSJacob Faibussowitsch if (i<m-1 && j>0) {J = global_index(i+1,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 625f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 635f80ce2aSJacob Faibussowitsch if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 645f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i, j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 655f80ce2aSJacob Faibussowitsch if (i>0 && j<n-1) {J = global_index(i-1,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 665f80ce2aSJacob Faibussowitsch v = 8.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 67c4762a1bSJed Brown } 68c4762a1bSJed Brown } 695f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "2d9point2", &equal)); 70c4762a1bSJed Brown if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ 715f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,9,NULL,9,NULL)); 725f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,9,NULL)); 735f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 74c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 75c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 765f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 775f80ce2aSJacob Faibussowitsch if (i>1) {J = global_index(i-2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 785f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 795f80ce2aSJacob Faibussowitsch if (i<m-2) {J = global_index(i+2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 805f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 815f80ce2aSJacob Faibussowitsch if (j>1) {J = global_index(i,j-2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 825f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 835f80ce2aSJacob Faibussowitsch if (j<n-2) {J = global_index(i,j+2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 845f80ce2aSJacob Faibussowitsch v = 8.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 85c4762a1bSJed Brown } 86c4762a1bSJed Brown } 875f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "2d13point", &equal)); 88c4762a1bSJed Brown if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ 895f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,13,NULL,13,NULL)); 905f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,13,NULL)); 915f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 92c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 93c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 945f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 955f80ce2aSJacob Faibussowitsch if (i>1) {J = global_index(i-2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 965f80ce2aSJacob Faibussowitsch if (i>2) {J = global_index(i-3,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 975f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 985f80ce2aSJacob Faibussowitsch if (i<m-2) {J = global_index(i+2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 995f80ce2aSJacob Faibussowitsch if (i<m-3) {J = global_index(i+3,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1005f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1015f80ce2aSJacob Faibussowitsch if (j>1) {J = global_index(i,j-2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1025f80ce2aSJacob Faibussowitsch if (j>2) {J = global_index(i,j-3,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1035f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1045f80ce2aSJacob Faibussowitsch if (j<n-2) {J = global_index(i,j+2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1055f80ce2aSJacob Faibussowitsch if (j<n-3) {J = global_index(i,j+3,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1065f80ce2aSJacob Faibussowitsch v = 12.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 107c4762a1bSJed Brown } 108c4762a1bSJed Brown } 109c4762a1bSJed Brown /************ 3D stencils ***************/ 1105f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "3d7point", &equal)); 111c4762a1bSJed Brown if (equal) { /* 7-point stencil, 3D */ 1125f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,7,NULL,7,NULL)); 1135f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,7,NULL)); 1145f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 115c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 116c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 1175f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1185f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1195f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1205f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1215f80ce2aSJacob Faibussowitsch if (k>0) {J = global_index(i,j,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1225f80ce2aSJacob Faibussowitsch if (k<o-1) {J = global_index(i,j,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1235f80ce2aSJacob Faibussowitsch v = 6.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 124c4762a1bSJed Brown } 125c4762a1bSJed Brown } 1265f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "3d13point", &equal)); 127c4762a1bSJed Brown if (equal) { /* 13-point stencil, 3D */ 1285f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,13,NULL,13,NULL)); 1295f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,13,NULL)); 1305f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 131c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 132c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 1335f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1345f80ce2aSJacob Faibussowitsch if (i>1) {J = global_index(i-2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1355f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1365f80ce2aSJacob Faibussowitsch if (i<m-2) {J = global_index(i+2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1375f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1385f80ce2aSJacob Faibussowitsch if (j>1) {J = global_index(i,j-2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1395f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1405f80ce2aSJacob Faibussowitsch if (j<n-2) {J = global_index(i,j+2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1415f80ce2aSJacob Faibussowitsch if (k>0) {J = global_index(i,j,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1425f80ce2aSJacob Faibussowitsch if (k>1) {J = global_index(i,j,k-2,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1435f80ce2aSJacob Faibussowitsch if (k<o-1) {J = global_index(i,j,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1445f80ce2aSJacob Faibussowitsch if (k<o-2) {J = global_index(i,j,k+2,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1455f80ce2aSJacob Faibussowitsch v = 12.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 146c4762a1bSJed Brown } 147c4762a1bSJed Brown } 1485f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "3d19point", &equal)); 149c4762a1bSJed Brown if (equal) { /* 19-point stencil, 3D */ 1505f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,19,NULL,19,NULL)); 1515f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,19,NULL)); 1525f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 153c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 154c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 155c4762a1bSJed Brown /* one hop */ 1565f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1575f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1585f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1595f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1605f80ce2aSJacob Faibussowitsch if (k>0) {J = global_index(i,j,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1615f80ce2aSJacob Faibussowitsch if (k<o-1) {J = global_index(i,j,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 162c4762a1bSJed Brown /* two hops */ 1635f80ce2aSJacob Faibussowitsch if (i>0 && j>0) {J = global_index(i-1,j-1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1645f80ce2aSJacob Faibussowitsch if (i>0 && k>0) {J = global_index(i-1,j, k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1655f80ce2aSJacob Faibussowitsch if (i>0 && j<n-1) {J = global_index(i-1,j+1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1665f80ce2aSJacob Faibussowitsch if (i>0 && k<o-1) {J = global_index(i-1,j, k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1675f80ce2aSJacob Faibussowitsch if (i<m-1 && j>0) {J = global_index(i+1,j-1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1685f80ce2aSJacob Faibussowitsch if (i<m-1 && k>0) {J = global_index(i+1,j, k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1695f80ce2aSJacob Faibussowitsch if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1705f80ce2aSJacob Faibussowitsch if (i<m-1 && k<o-1) {J = global_index(i+1,j, k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1715f80ce2aSJacob Faibussowitsch if (j>0 && k>0) {J = global_index(i, j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1725f80ce2aSJacob Faibussowitsch if (j>0 && k<o-1) {J = global_index(i, j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1735f80ce2aSJacob Faibussowitsch if (j<n-1 && k>0) {J = global_index(i, j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1745f80ce2aSJacob Faibussowitsch if (j<n-1 && k<o-1) {J = global_index(i, j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1755f80ce2aSJacob Faibussowitsch v = 18.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 176c4762a1bSJed Brown } 177c4762a1bSJed Brown } 1785f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "3d27point", &equal)); 179c4762a1bSJed Brown if (equal) { /* 27-point stencil, 3D */ 1805f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,27,NULL,27,NULL)); 1815f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,27,NULL)); 1825f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 183c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 184c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 185c4762a1bSJed Brown if (k>0) { 186c4762a1bSJed Brown if (j>0) { 1875f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1885f80ce2aSJacob Faibussowitsch J = global_index(i, j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 1895f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 190c4762a1bSJed Brown } 191c4762a1bSJed Brown { 1925f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1935f80ce2aSJacob Faibussowitsch J = global_index(i, j, k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 1945f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 195c4762a1bSJed Brown } 196c4762a1bSJed Brown if (j<n-1) { 1975f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1985f80ce2aSJacob Faibussowitsch J = global_index(i, j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 1995f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 200c4762a1bSJed Brown } 201c4762a1bSJed Brown } 202c4762a1bSJed Brown { 203c4762a1bSJed Brown if (j>0) { 2045f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j-1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2055f80ce2aSJacob Faibussowitsch J = global_index(i, j-1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2065f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j-1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 207c4762a1bSJed Brown } 208c4762a1bSJed Brown { 2095f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2105f80ce2aSJacob Faibussowitsch J = global_index(i, j, k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2115f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 212c4762a1bSJed Brown } 213c4762a1bSJed Brown if (j<n-1) { 2145f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j+1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2155f80ce2aSJacob Faibussowitsch J = global_index(i, j+1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2165f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j+1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 217c4762a1bSJed Brown } 218c4762a1bSJed Brown } 219c4762a1bSJed Brown if (k<o-1) { 220c4762a1bSJed Brown if (j>0) { 2215f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2225f80ce2aSJacob Faibussowitsch J = global_index(i, j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2235f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 224c4762a1bSJed Brown } 225c4762a1bSJed Brown { 2265f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2275f80ce2aSJacob Faibussowitsch J = global_index(i, j, k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2285f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 229c4762a1bSJed Brown } 230c4762a1bSJed Brown if (j<n-1) { 2315f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2325f80ce2aSJacob Faibussowitsch J = global_index(i, j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2335f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 234c4762a1bSJed Brown } 235c4762a1bSJed Brown } 2365f80ce2aSJacob Faibussowitsch v = 26.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 237c4762a1bSJed Brown } 238c4762a1bSJed Brown } 2395f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 2405f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 241c4762a1bSJed Brown 242c4762a1bSJed Brown /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ 2435f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(A,MAT_COPY_VALUES,&B)); 244c4762a1bSJed Brown 2455f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage)); 246c4762a1bSJed Brown 247c4762a1bSJed Brown /* Test C = A*B */ 2485f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogStagePush(fullMatMatMultStage)); 2495f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C)); 250c4762a1bSJed Brown 251c4762a1bSJed Brown /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ 2525f80ce2aSJacob Faibussowitsch CHKERRQ(MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP)); 2535f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy)); 2545f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared)); 255c4762a1bSJed Brown 2565f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(C,PETSC_VIEWER_STDOUT_WORLD)); 2575f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD)); 258c4762a1bSJed Brown 2595f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&PtAP_squared)); 2605f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&PtAP_copy)); 2615f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&PtAP)); 2625f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&C)); 2635f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&B)); 2645f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 265*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscFinalize()); 266*b122ec5aSJacob Faibussowitsch return 0; 267c4762a1bSJed Brown } 268c4762a1bSJed Brown 269c4762a1bSJed Brown /*TEST 270c4762a1bSJed Brown 271c4762a1bSJed Brown test: 272c4762a1bSJed Brown suffix: 1 273c4762a1bSJed Brown nsize: 1 274c20d7725SJed Brown args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted 275c4762a1bSJed Brown 276c4762a1bSJed Brown test: 277c4762a1bSJed Brown suffix: 2 278c4762a1bSJed Brown nsize: 1 279c4762a1bSJed Brown args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge 280c4762a1bSJed Brown 281c4762a1bSJed Brown test: 282c4762a1bSJed Brown suffix: 3 283c4762a1bSJed Brown nsize: 4 284c4762a1bSJed Brown args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi 285c4762a1bSJed Brown 286c4762a1bSJed Brown TEST*/ 287