1c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ 6c4762a1bSJed Brown int global_index(PetscInt i,PetscInt j,PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; } 7c4762a1bSJed Brown 8c4762a1bSJed Brown int main(int argc,char **argv) 9c4762a1bSJed Brown { 10c4762a1bSJed Brown Mat A,B,C,PtAP,PtAP_copy,PtAP_squared; 11c4762a1bSJed Brown PetscInt i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1; 12c4762a1bSJed Brown PetscScalar v; 13c4762a1bSJed Brown PetscBool equal=PETSC_FALSE,mat_view=PETSC_FALSE; 14c4762a1bSJed Brown char stencil[PETSC_MAX_PATH_LEN]; 15c4762a1bSJed Brown #if defined(PETSC_USE_LOG) 16c4762a1bSJed Brown PetscLogStage fullMatMatMultStage; 17c4762a1bSJed Brown #endif 18c4762a1bSJed Brown 19*327415f7SBarry Smith PetscFunctionBeginUser; 209566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc,&argv,(char*)0,help)); 219566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL)); 229566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 239566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL)); 249566063dSJacob Faibussowitsch PetscCall(PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view)); 259566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL,NULL,"-stencil",stencil,sizeof(stencil),NULL)); 26c4762a1bSJed Brown 27c4762a1bSJed Brown /* Create a aij matrix A */ 28c4762a1bSJed Brown M = N = m*n*o; 299566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD,&A)); 309566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N)); 319566063dSJacob Faibussowitsch PetscCall(MatSetType(A,MATAIJ)); 329566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 33c4762a1bSJed Brown 34c4762a1bSJed Brown /* Consistency checks */ 35bc30f867SBarry Smith PetscCheck(o >= 1 && m > 1 && n >= 1,PETSC_COMM_WORLD,PETSC_ERR_USER,"Dimensions need to be larger than zero!"); 36c4762a1bSJed Brown 37c4762a1bSJed Brown /************ 2D stencils ***************/ 389566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d5point", &equal)); 39c4762a1bSJed Brown if (equal) { /* 5-point stencil, 2D */ 409566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,5,NULL,5,NULL)); 419566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A,5,NULL)); 429566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&Istart,&Iend)); 43c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 44c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 459566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 469566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 479566063dSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 489566063dSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 499566063dSJacob Faibussowitsch v = 4.0; PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 50c4762a1bSJed Brown } 51c4762a1bSJed Brown } 529566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d9point", &equal)); 53c4762a1bSJed Brown if (equal) { /* 9-point stencil, 2D */ 549566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,9,NULL,9,NULL)); 559566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A,9,NULL)); 569566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&Istart,&Iend)); 57c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 58c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 599566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 609566063dSJacob Faibussowitsch if (i>0 && j>0) {J = global_index(i-1,j-1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 619566063dSJacob Faibussowitsch if (j>0) {J = global_index(i, j-1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 629566063dSJacob Faibussowitsch if (i<m-1 && j>0) {J = global_index(i+1,j-1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 639566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 649566063dSJacob Faibussowitsch if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 659566063dSJacob Faibussowitsch if (j<n-1) {J = global_index(i, j+1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 669566063dSJacob Faibussowitsch if (i>0 && j<n-1) {J = global_index(i-1,j+1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 679566063dSJacob Faibussowitsch v = 8.0; PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 68c4762a1bSJed Brown } 69c4762a1bSJed Brown } 709566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d9point2", &equal)); 71c4762a1bSJed Brown if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ 729566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,9,NULL,9,NULL)); 739566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A,9,NULL)); 749566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&Istart,&Iend)); 75c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 76c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 779566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 789566063dSJacob Faibussowitsch if (i>1) {J = global_index(i-2,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 799566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 809566063dSJacob Faibussowitsch if (i<m-2) {J = global_index(i+2,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 819566063dSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 829566063dSJacob Faibussowitsch if (j>1) {J = global_index(i,j-2,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 839566063dSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 849566063dSJacob Faibussowitsch if (j<n-2) {J = global_index(i,j+2,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 859566063dSJacob Faibussowitsch v = 8.0; PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 86c4762a1bSJed Brown } 87c4762a1bSJed Brown } 889566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d13point", &equal)); 89c4762a1bSJed Brown if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ 909566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,13,NULL,13,NULL)); 919566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A,13,NULL)); 929566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&Istart,&Iend)); 93c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 94c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 959566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 969566063dSJacob Faibussowitsch if (i>1) {J = global_index(i-2,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 979566063dSJacob Faibussowitsch if (i>2) {J = global_index(i-3,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 989566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 999566063dSJacob Faibussowitsch if (i<m-2) {J = global_index(i+2,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1009566063dSJacob Faibussowitsch if (i<m-3) {J = global_index(i+3,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1019566063dSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1029566063dSJacob Faibussowitsch if (j>1) {J = global_index(i,j-2,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1039566063dSJacob Faibussowitsch if (j>2) {J = global_index(i,j-3,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1049566063dSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1059566063dSJacob Faibussowitsch if (j<n-2) {J = global_index(i,j+2,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1069566063dSJacob Faibussowitsch if (j<n-3) {J = global_index(i,j+3,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1079566063dSJacob Faibussowitsch v = 12.0; PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 108c4762a1bSJed Brown } 109c4762a1bSJed Brown } 110c4762a1bSJed Brown /************ 3D stencils ***************/ 1119566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d7point", &equal)); 112c4762a1bSJed Brown if (equal) { /* 7-point stencil, 3D */ 1139566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,7,NULL,7,NULL)); 1149566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A,7,NULL)); 1159566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&Istart,&Iend)); 116c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 117c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 1189566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1199566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1209566063dSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1219566063dSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1229566063dSJacob Faibussowitsch if (k>0) {J = global_index(i,j,k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1239566063dSJacob Faibussowitsch if (k<o-1) {J = global_index(i,j,k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1249566063dSJacob Faibussowitsch v = 6.0; PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 125c4762a1bSJed Brown } 126c4762a1bSJed Brown } 1279566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d13point", &equal)); 128c4762a1bSJed Brown if (equal) { /* 13-point stencil, 3D */ 1299566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,13,NULL,13,NULL)); 1309566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A,13,NULL)); 1319566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&Istart,&Iend)); 132c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 133c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 1349566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1359566063dSJacob Faibussowitsch if (i>1) {J = global_index(i-2,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1369566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1379566063dSJacob Faibussowitsch if (i<m-2) {J = global_index(i+2,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1389566063dSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1399566063dSJacob Faibussowitsch if (j>1) {J = global_index(i,j-2,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1409566063dSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1419566063dSJacob Faibussowitsch if (j<n-2) {J = global_index(i,j+2,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1429566063dSJacob Faibussowitsch if (k>0) {J = global_index(i,j,k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1439566063dSJacob Faibussowitsch if (k>1) {J = global_index(i,j,k-2,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1449566063dSJacob Faibussowitsch if (k<o-1) {J = global_index(i,j,k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1459566063dSJacob Faibussowitsch if (k<o-2) {J = global_index(i,j,k+2,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1469566063dSJacob Faibussowitsch v = 12.0; PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 147c4762a1bSJed Brown } 148c4762a1bSJed Brown } 1499566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d19point", &equal)); 150c4762a1bSJed Brown if (equal) { /* 19-point stencil, 3D */ 1519566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,19,NULL,19,NULL)); 1529566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A,19,NULL)); 1539566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&Istart,&Iend)); 154c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 155c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 156c4762a1bSJed Brown /* one hop */ 1579566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1589566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1599566063dSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1609566063dSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1619566063dSJacob Faibussowitsch if (k>0) {J = global_index(i,j,k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1629566063dSJacob Faibussowitsch if (k<o-1) {J = global_index(i,j,k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 163c4762a1bSJed Brown /* two hops */ 1649566063dSJacob Faibussowitsch if (i>0 && j>0) {J = global_index(i-1,j-1,k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1659566063dSJacob Faibussowitsch if (i>0 && k>0) {J = global_index(i-1,j, k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1669566063dSJacob Faibussowitsch if (i>0 && j<n-1) {J = global_index(i-1,j+1,k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1679566063dSJacob Faibussowitsch if (i>0 && k<o-1) {J = global_index(i-1,j, k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1689566063dSJacob Faibussowitsch if (i<m-1 && j>0) {J = global_index(i+1,j-1,k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1699566063dSJacob Faibussowitsch if (i<m-1 && k>0) {J = global_index(i+1,j, k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1709566063dSJacob Faibussowitsch if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1719566063dSJacob Faibussowitsch if (i<m-1 && k<o-1) {J = global_index(i+1,j, k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1729566063dSJacob Faibussowitsch if (j>0 && k>0) {J = global_index(i, j-1,k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1739566063dSJacob Faibussowitsch if (j>0 && k<o-1) {J = global_index(i, j-1,k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1749566063dSJacob Faibussowitsch if (j<n-1 && k>0) {J = global_index(i, j+1,k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1759566063dSJacob Faibussowitsch if (j<n-1 && k<o-1) {J = global_index(i, j+1,k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1769566063dSJacob Faibussowitsch v = 18.0; PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 177c4762a1bSJed Brown } 178c4762a1bSJed Brown } 1799566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d27point", &equal)); 180c4762a1bSJed Brown if (equal) { /* 27-point stencil, 3D */ 1819566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,27,NULL,27,NULL)); 1829566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A,27,NULL)); 1839566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&Istart,&Iend)); 184c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 185c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 186c4762a1bSJed Brown if (k>0) { 187c4762a1bSJed Brown if (j>0) { 1889566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j-1,k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1899566063dSJacob Faibussowitsch J = global_index(i, j-1,k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 1909566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 191c4762a1bSJed Brown } 192c4762a1bSJed Brown { 1939566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1949566063dSJacob Faibussowitsch J = global_index(i, j, k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 1959566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 196c4762a1bSJed Brown } 197c4762a1bSJed Brown if (j<n-1) { 1989566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j+1,k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 1999566063dSJacob Faibussowitsch J = global_index(i, j+1,k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2009566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 201c4762a1bSJed Brown } 202c4762a1bSJed Brown } 203c4762a1bSJed Brown { 204c4762a1bSJed Brown if (j>0) { 2059566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j-1,k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2069566063dSJacob Faibussowitsch J = global_index(i, j-1,k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2079566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j-1,k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 208c4762a1bSJed Brown } 209c4762a1bSJed Brown { 2109566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2119566063dSJacob Faibussowitsch J = global_index(i, j, k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2129566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 213c4762a1bSJed Brown } 214c4762a1bSJed Brown if (j<n-1) { 2159566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j+1,k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2169566063dSJacob Faibussowitsch J = global_index(i, j+1,k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2179566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j+1,k ,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 218c4762a1bSJed Brown } 219c4762a1bSJed Brown } 220c4762a1bSJed Brown if (k<o-1) { 221c4762a1bSJed Brown if (j>0) { 2229566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j-1,k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2239566063dSJacob Faibussowitsch J = global_index(i, j-1,k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2249566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 225c4762a1bSJed Brown } 226c4762a1bSJed Brown { 2279566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2289566063dSJacob Faibussowitsch J = global_index(i, j, k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2299566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 230c4762a1bSJed Brown } 231c4762a1bSJed Brown if (j<n-1) { 2329566063dSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j+1,k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 2339566063dSJacob Faibussowitsch J = global_index(i, j+1,k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 2349566063dSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 235c4762a1bSJed Brown } 236c4762a1bSJed Brown } 2379566063dSJacob Faibussowitsch v = 26.0; PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 238c4762a1bSJed Brown } 239c4762a1bSJed Brown } 2409566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 2419566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 242c4762a1bSJed Brown 243c4762a1bSJed Brown /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ 2449566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 245c4762a1bSJed Brown 2469566063dSJacob Faibussowitsch PetscCall(PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage)); 247c4762a1bSJed Brown 248c4762a1bSJed Brown /* Test C = A*B */ 2499566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(fullMatMatMultStage)); 2509566063dSJacob Faibussowitsch PetscCall(MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C)); 251c4762a1bSJed Brown 252c4762a1bSJed Brown /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ 2539566063dSJacob Faibussowitsch PetscCall(MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP)); 2549566063dSJacob Faibussowitsch PetscCall(MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy)); 2559566063dSJacob Faibussowitsch PetscCall(MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared)); 256c4762a1bSJed Brown 2579566063dSJacob Faibussowitsch PetscCall(MatView(C,PETSC_VIEWER_STDOUT_WORLD)); 2589566063dSJacob Faibussowitsch PetscCall(MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD)); 259c4762a1bSJed Brown 2609566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP_squared)); 2619566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP_copy)); 2629566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP)); 2639566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 2649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 2659566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 2669566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 267b122ec5aSJacob Faibussowitsch return 0; 268c4762a1bSJed Brown } 269c4762a1bSJed Brown 270c4762a1bSJed Brown /*TEST 271c4762a1bSJed Brown 272c4762a1bSJed Brown test: 273c4762a1bSJed Brown suffix: 1 274c4762a1bSJed Brown nsize: 1 275c20d7725SJed Brown args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted 276c4762a1bSJed Brown 277c4762a1bSJed Brown test: 278c4762a1bSJed Brown suffix: 2 279c4762a1bSJed Brown nsize: 1 280c4762a1bSJed Brown args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge 281c4762a1bSJed Brown 282c4762a1bSJed Brown test: 283c4762a1bSJed Brown suffix: 3 284c4762a1bSJed Brown nsize: 4 285c4762a1bSJed Brown args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi 286c4762a1bSJed Brown 287c4762a1bSJed Brown TEST*/ 288