1c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ 6*d71ae5a4SJacob Faibussowitsch int global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n) 7*d71ae5a4SJacob Faibussowitsch { 89371c9d4SSatish Balay return i + j * m + k * m * n; 99371c9d4SSatish Balay } 10c4762a1bSJed Brown 11*d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv) 12*d71ae5a4SJacob Faibussowitsch { 13c4762a1bSJed Brown Mat A, B, C, PtAP, PtAP_copy, PtAP_squared; 14c4762a1bSJed Brown PetscInt i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1; 15c4762a1bSJed Brown PetscScalar v; 16c4762a1bSJed Brown PetscBool equal = PETSC_FALSE, mat_view = PETSC_FALSE; 17c4762a1bSJed Brown char stencil[PETSC_MAX_PATH_LEN]; 18c4762a1bSJed Brown #if defined(PETSC_USE_LOG) 19c4762a1bSJed Brown PetscLogStage fullMatMatMultStage; 20c4762a1bSJed Brown #endif 21c4762a1bSJed Brown 22327415f7SBarry Smith PetscFunctionBeginUser; 239566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 249566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL)); 259566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 269566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL)); 279566063dSJacob Faibussowitsch PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view)); 289566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL)); 29c4762a1bSJed Brown 30c4762a1bSJed Brown /* Create a aij matrix A */ 31c4762a1bSJed Brown M = N = m * n * o; 329566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 339566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N)); 349566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATAIJ)); 359566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 36c4762a1bSJed Brown 37c4762a1bSJed Brown /* Consistency checks */ 38bc30f867SBarry Smith PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!"); 39c4762a1bSJed Brown 40c4762a1bSJed Brown /************ 2D stencils ***************/ 419566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d5point", &equal)); 42c4762a1bSJed Brown if (equal) { /* 5-point stencil, 2D */ 439566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL)); 449566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL)); 459566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 46c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 479371c9d4SSatish Balay v = -1.0; 489371c9d4SSatish Balay k = Ii / (m * n); 499371c9d4SSatish Balay j = (Ii - k * m * n) / m; 509371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 519371c9d4SSatish Balay if (i > 0) { 529371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 539371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 549371c9d4SSatish Balay } 559371c9d4SSatish Balay if (i < m - 1) { 569371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 579371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 589371c9d4SSatish Balay } 599371c9d4SSatish Balay if (j > 0) { 609371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 619371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 629371c9d4SSatish Balay } 639371c9d4SSatish Balay if (j < n - 1) { 649371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 659371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 669371c9d4SSatish Balay } 679371c9d4SSatish Balay v = 4.0; 689371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 69c4762a1bSJed Brown } 70c4762a1bSJed Brown } 719566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d9point", &equal)); 72c4762a1bSJed Brown if (equal) { /* 9-point stencil, 2D */ 739566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); 749566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); 759566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 76c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 779371c9d4SSatish Balay v = -1.0; 789371c9d4SSatish Balay k = Ii / (m * n); 799371c9d4SSatish Balay j = (Ii - k * m * n) / m; 809371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 819371c9d4SSatish Balay if (i > 0) { 829371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 839371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 849371c9d4SSatish Balay } 859371c9d4SSatish Balay if (i > 0 && j > 0) { 869371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n); 879371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 889371c9d4SSatish Balay } 899371c9d4SSatish Balay if (j > 0) { 909371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 919371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 929371c9d4SSatish Balay } 939371c9d4SSatish Balay if (i < m - 1 && j > 0) { 949371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n); 959371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 969371c9d4SSatish Balay } 979371c9d4SSatish Balay if (i < m - 1) { 989371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 999371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1009371c9d4SSatish Balay } 1019371c9d4SSatish Balay if (i < m - 1 && j < n - 1) { 1029371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n); 1039371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1049371c9d4SSatish Balay } 1059371c9d4SSatish Balay if (j < n - 1) { 1069371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 1079371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1089371c9d4SSatish Balay } 1099371c9d4SSatish Balay if (i > 0 && j < n - 1) { 1109371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n); 1119371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1129371c9d4SSatish Balay } 1139371c9d4SSatish Balay v = 8.0; 1149371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 115c4762a1bSJed Brown } 116c4762a1bSJed Brown } 1179566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d9point2", &equal)); 118c4762a1bSJed Brown if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ 1199566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); 1209566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); 1219566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 122c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 1239371c9d4SSatish Balay v = -1.0; 1249371c9d4SSatish Balay k = Ii / (m * n); 1259371c9d4SSatish Balay j = (Ii - k * m * n) / m; 1269371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 1279371c9d4SSatish Balay if (i > 0) { 1289371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 1299371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1309371c9d4SSatish Balay } 1319371c9d4SSatish Balay if (i > 1) { 1329371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n); 1339371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1349371c9d4SSatish Balay } 1359371c9d4SSatish Balay if (i < m - 1) { 1369371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 1379371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1389371c9d4SSatish Balay } 1399371c9d4SSatish Balay if (i < m - 2) { 1409371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n); 1419371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1429371c9d4SSatish Balay } 1439371c9d4SSatish Balay if (j > 0) { 1449371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 1459371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1469371c9d4SSatish Balay } 1479371c9d4SSatish Balay if (j > 1) { 1489371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n); 1499371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1509371c9d4SSatish Balay } 1519371c9d4SSatish Balay if (j < n - 1) { 1529371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 1539371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1549371c9d4SSatish Balay } 1559371c9d4SSatish Balay if (j < n - 2) { 1569371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n); 1579371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1589371c9d4SSatish Balay } 1599371c9d4SSatish Balay v = 8.0; 1609371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 161c4762a1bSJed Brown } 162c4762a1bSJed Brown } 1639566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d13point", &equal)); 164c4762a1bSJed Brown if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ 1659566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); 1669566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); 1679566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 168c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 1699371c9d4SSatish Balay v = -1.0; 1709371c9d4SSatish Balay k = Ii / (m * n); 1719371c9d4SSatish Balay j = (Ii - k * m * n) / m; 1729371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 1739371c9d4SSatish Balay if (i > 0) { 1749371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 1759371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1769371c9d4SSatish Balay } 1779371c9d4SSatish Balay if (i > 1) { 1789371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n); 1799371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1809371c9d4SSatish Balay } 1819371c9d4SSatish Balay if (i > 2) { 1829371c9d4SSatish Balay J = global_index(i - 3, j, k, m, n); 1839371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1849371c9d4SSatish Balay } 1859371c9d4SSatish Balay if (i < m - 1) { 1869371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 1879371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1889371c9d4SSatish Balay } 1899371c9d4SSatish Balay if (i < m - 2) { 1909371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n); 1919371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1929371c9d4SSatish Balay } 1939371c9d4SSatish Balay if (i < m - 3) { 1949371c9d4SSatish Balay J = global_index(i + 3, j, k, m, n); 1959371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1969371c9d4SSatish Balay } 1979371c9d4SSatish Balay if (j > 0) { 1989371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 1999371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2009371c9d4SSatish Balay } 2019371c9d4SSatish Balay if (j > 1) { 2029371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n); 2039371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2049371c9d4SSatish Balay } 2059371c9d4SSatish Balay if (j > 2) { 2069371c9d4SSatish Balay J = global_index(i, j - 3, k, m, n); 2079371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2089371c9d4SSatish Balay } 2099371c9d4SSatish Balay if (j < n - 1) { 2109371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 2119371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2129371c9d4SSatish Balay } 2139371c9d4SSatish Balay if (j < n - 2) { 2149371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n); 2159371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2169371c9d4SSatish Balay } 2179371c9d4SSatish Balay if (j < n - 3) { 2189371c9d4SSatish Balay J = global_index(i, j + 3, k, m, n); 2199371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2209371c9d4SSatish Balay } 2219371c9d4SSatish Balay v = 12.0; 2229371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 223c4762a1bSJed Brown } 224c4762a1bSJed Brown } 225c4762a1bSJed Brown /************ 3D stencils ***************/ 2269566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d7point", &equal)); 227c4762a1bSJed Brown if (equal) { /* 7-point stencil, 3D */ 2289566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL)); 2299566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL)); 2309566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 231c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 2329371c9d4SSatish Balay v = -1.0; 2339371c9d4SSatish Balay k = Ii / (m * n); 2349371c9d4SSatish Balay j = (Ii - k * m * n) / m; 2359371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 2369371c9d4SSatish Balay if (i > 0) { 2379371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 2389371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2399371c9d4SSatish Balay } 2409371c9d4SSatish Balay if (i < m - 1) { 2419371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 2429371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2439371c9d4SSatish Balay } 2449371c9d4SSatish Balay if (j > 0) { 2459371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 2469371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2479371c9d4SSatish Balay } 2489371c9d4SSatish Balay if (j < n - 1) { 2499371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 2509371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2519371c9d4SSatish Balay } 2529371c9d4SSatish Balay if (k > 0) { 2539371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 2549371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2559371c9d4SSatish Balay } 2569371c9d4SSatish Balay if (k < o - 1) { 2579371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 2589371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2599371c9d4SSatish Balay } 2609371c9d4SSatish Balay v = 6.0; 2619371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 262c4762a1bSJed Brown } 263c4762a1bSJed Brown } 2649566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d13point", &equal)); 265c4762a1bSJed Brown if (equal) { /* 13-point stencil, 3D */ 2669566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); 2679566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); 2689566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 269c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 2709371c9d4SSatish Balay v = -1.0; 2719371c9d4SSatish Balay k = Ii / (m * n); 2729371c9d4SSatish Balay j = (Ii - k * m * n) / m; 2739371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 2749371c9d4SSatish Balay if (i > 0) { 2759371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 2769371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2779371c9d4SSatish Balay } 2789371c9d4SSatish Balay if (i > 1) { 2799371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n); 2809371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2819371c9d4SSatish Balay } 2829371c9d4SSatish Balay if (i < m - 1) { 2839371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 2849371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2859371c9d4SSatish Balay } 2869371c9d4SSatish Balay if (i < m - 2) { 2879371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n); 2889371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2899371c9d4SSatish Balay } 2909371c9d4SSatish Balay if (j > 0) { 2919371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 2929371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2939371c9d4SSatish Balay } 2949371c9d4SSatish Balay if (j > 1) { 2959371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n); 2969371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2979371c9d4SSatish Balay } 2989371c9d4SSatish Balay if (j < n - 1) { 2999371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 3009371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3019371c9d4SSatish Balay } 3029371c9d4SSatish Balay if (j < n - 2) { 3039371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n); 3049371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3059371c9d4SSatish Balay } 3069371c9d4SSatish Balay if (k > 0) { 3079371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 3089371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3099371c9d4SSatish Balay } 3109371c9d4SSatish Balay if (k > 1) { 3119371c9d4SSatish Balay J = global_index(i, j, k - 2, m, n); 3129371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3139371c9d4SSatish Balay } 3149371c9d4SSatish Balay if (k < o - 1) { 3159371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 3169371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3179371c9d4SSatish Balay } 3189371c9d4SSatish Balay if (k < o - 2) { 3199371c9d4SSatish Balay J = global_index(i, j, k + 2, m, n); 3209371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3219371c9d4SSatish Balay } 3229371c9d4SSatish Balay v = 12.0; 3239371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 324c4762a1bSJed Brown } 325c4762a1bSJed Brown } 3269566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d19point", &equal)); 327c4762a1bSJed Brown if (equal) { /* 19-point stencil, 3D */ 3289566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL)); 3299566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL)); 3309566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 331c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 3329371c9d4SSatish Balay v = -1.0; 3339371c9d4SSatish Balay k = Ii / (m * n); 3349371c9d4SSatish Balay j = (Ii - k * m * n) / m; 3359371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 336c4762a1bSJed Brown /* one hop */ 3379371c9d4SSatish Balay if (i > 0) { 3389371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 3399371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3409371c9d4SSatish Balay } 3419371c9d4SSatish Balay if (i < m - 1) { 3429371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 3439371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3449371c9d4SSatish Balay } 3459371c9d4SSatish Balay if (j > 0) { 3469371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 3479371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3489371c9d4SSatish Balay } 3499371c9d4SSatish Balay if (j < n - 1) { 3509371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 3519371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3529371c9d4SSatish Balay } 3539371c9d4SSatish Balay if (k > 0) { 3549371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 3559371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3569371c9d4SSatish Balay } 3579371c9d4SSatish Balay if (k < o - 1) { 3589371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 3599371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3609371c9d4SSatish Balay } 361c4762a1bSJed Brown /* two hops */ 3629371c9d4SSatish Balay if (i > 0 && j > 0) { 3639371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n); 3649371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3659371c9d4SSatish Balay } 3669371c9d4SSatish Balay if (i > 0 && k > 0) { 3679371c9d4SSatish Balay J = global_index(i - 1, j, k - 1, m, n); 3689371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3699371c9d4SSatish Balay } 3709371c9d4SSatish Balay if (i > 0 && j < n - 1) { 3719371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n); 3729371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3739371c9d4SSatish Balay } 3749371c9d4SSatish Balay if (i > 0 && k < o - 1) { 3759371c9d4SSatish Balay J = global_index(i - 1, j, k + 1, m, n); 3769371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3779371c9d4SSatish Balay } 3789371c9d4SSatish Balay if (i < m - 1 && j > 0) { 3799371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n); 3809371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3819371c9d4SSatish Balay } 3829371c9d4SSatish Balay if (i < m - 1 && k > 0) { 3839371c9d4SSatish Balay J = global_index(i + 1, j, k - 1, m, n); 3849371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3859371c9d4SSatish Balay } 3869371c9d4SSatish Balay if (i < m - 1 && j < n - 1) { 3879371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n); 3889371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3899371c9d4SSatish Balay } 3909371c9d4SSatish Balay if (i < m - 1 && k < o - 1) { 3919371c9d4SSatish Balay J = global_index(i + 1, j, k + 1, m, n); 3929371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3939371c9d4SSatish Balay } 3949371c9d4SSatish Balay if (j > 0 && k > 0) { 3959371c9d4SSatish Balay J = global_index(i, j - 1, k - 1, m, n); 3969371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3979371c9d4SSatish Balay } 3989371c9d4SSatish Balay if (j > 0 && k < o - 1) { 3999371c9d4SSatish Balay J = global_index(i, j - 1, k + 1, m, n); 4009371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4019371c9d4SSatish Balay } 4029371c9d4SSatish Balay if (j < n - 1 && k > 0) { 4039371c9d4SSatish Balay J = global_index(i, j + 1, k - 1, m, n); 4049371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4059371c9d4SSatish Balay } 4069371c9d4SSatish Balay if (j < n - 1 && k < o - 1) { 4079371c9d4SSatish Balay J = global_index(i, j + 1, k + 1, m, n); 4089371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4099371c9d4SSatish Balay } 4109371c9d4SSatish Balay v = 18.0; 4119371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 412c4762a1bSJed Brown } 413c4762a1bSJed Brown } 4149566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d27point", &equal)); 415c4762a1bSJed Brown if (equal) { /* 27-point stencil, 3D */ 4169566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL)); 4179566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL)); 4189566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 419c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 4209371c9d4SSatish Balay v = -1.0; 4219371c9d4SSatish Balay k = Ii / (m * n); 4229371c9d4SSatish Balay j = (Ii - k * m * n) / m; 4239371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 424c4762a1bSJed Brown if (k > 0) { 425c4762a1bSJed Brown if (j > 0) { 4269371c9d4SSatish Balay if (i > 0) { 4279371c9d4SSatish Balay J = global_index(i - 1, j - 1, k - 1, m, n); 4289371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4299371c9d4SSatish Balay } 4309371c9d4SSatish Balay J = global_index(i, j - 1, k - 1, m, n); 4319371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4329371c9d4SSatish Balay if (i < m - 1) { 4339371c9d4SSatish Balay J = global_index(i + 1, j - 1, k - 1, m, n); 4349371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4359371c9d4SSatish Balay } 436c4762a1bSJed Brown } 437c4762a1bSJed Brown { 4389371c9d4SSatish Balay if (i > 0) { 4399371c9d4SSatish Balay J = global_index(i - 1, j, k - 1, m, n); 4409371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4419371c9d4SSatish Balay } 4429371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 4439371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4449371c9d4SSatish Balay if (i < m - 1) { 4459371c9d4SSatish Balay J = global_index(i + 1, j, k - 1, m, n); 4469371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4479371c9d4SSatish Balay } 448c4762a1bSJed Brown } 449c4762a1bSJed Brown if (j < n - 1) { 4509371c9d4SSatish Balay if (i > 0) { 4519371c9d4SSatish Balay J = global_index(i - 1, j + 1, k - 1, m, n); 4529371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4539371c9d4SSatish Balay } 4549371c9d4SSatish Balay J = global_index(i, j + 1, k - 1, m, n); 4559371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4569371c9d4SSatish Balay if (i < m - 1) { 4579371c9d4SSatish Balay J = global_index(i + 1, j + 1, k - 1, m, n); 4589371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4599371c9d4SSatish Balay } 460c4762a1bSJed Brown } 461c4762a1bSJed Brown } 462c4762a1bSJed Brown { 463c4762a1bSJed Brown if (j > 0) { 4649371c9d4SSatish Balay if (i > 0) { 4659371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n); 4669371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4679371c9d4SSatish Balay } 4689371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 4699371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4709371c9d4SSatish Balay if (i < m - 1) { 4719371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n); 4729371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4739371c9d4SSatish Balay } 474c4762a1bSJed Brown } 475c4762a1bSJed Brown { 4769371c9d4SSatish Balay if (i > 0) { 4779371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 4789371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4799371c9d4SSatish Balay } 4809371c9d4SSatish Balay J = global_index(i, j, k, m, n); 4819371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4829371c9d4SSatish Balay if (i < m - 1) { 4839371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 4849371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4859371c9d4SSatish Balay } 486c4762a1bSJed Brown } 487c4762a1bSJed Brown if (j < n - 1) { 4889371c9d4SSatish Balay if (i > 0) { 4899371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n); 4909371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4919371c9d4SSatish Balay } 4929371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 4939371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4949371c9d4SSatish Balay if (i < m - 1) { 4959371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n); 4969371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4979371c9d4SSatish Balay } 498c4762a1bSJed Brown } 499c4762a1bSJed Brown } 500c4762a1bSJed Brown if (k < o - 1) { 501c4762a1bSJed Brown if (j > 0) { 5029371c9d4SSatish Balay if (i > 0) { 5039371c9d4SSatish Balay J = global_index(i - 1, j - 1, k + 1, m, n); 5049371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5059371c9d4SSatish Balay } 5069371c9d4SSatish Balay J = global_index(i, j - 1, k + 1, m, n); 5079371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5089371c9d4SSatish Balay if (i < m - 1) { 5099371c9d4SSatish Balay J = global_index(i + 1, j - 1, k + 1, m, n); 5109371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5119371c9d4SSatish Balay } 512c4762a1bSJed Brown } 513c4762a1bSJed Brown { 5149371c9d4SSatish Balay if (i > 0) { 5159371c9d4SSatish Balay J = global_index(i - 1, j, k + 1, m, n); 5169371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5179371c9d4SSatish Balay } 5189371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 5199371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5209371c9d4SSatish Balay if (i < m - 1) { 5219371c9d4SSatish Balay J = global_index(i + 1, j, k + 1, m, n); 5229371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5239371c9d4SSatish Balay } 524c4762a1bSJed Brown } 525c4762a1bSJed Brown if (j < n - 1) { 5269371c9d4SSatish Balay if (i > 0) { 5279371c9d4SSatish Balay J = global_index(i - 1, j + 1, k + 1, m, n); 5289371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5299371c9d4SSatish Balay } 5309371c9d4SSatish Balay J = global_index(i, j + 1, k + 1, m, n); 5319371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5329371c9d4SSatish Balay if (i < m - 1) { 5339371c9d4SSatish Balay J = global_index(i + 1, j + 1, k + 1, m, n); 5349371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 535c4762a1bSJed Brown } 536c4762a1bSJed Brown } 5379371c9d4SSatish Balay } 5389371c9d4SSatish Balay v = 26.0; 5399371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 540c4762a1bSJed Brown } 541c4762a1bSJed Brown } 5429566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 5439566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 544c4762a1bSJed Brown 545c4762a1bSJed Brown /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ 5469566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 547c4762a1bSJed Brown 5489566063dSJacob Faibussowitsch PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage)); 549c4762a1bSJed Brown 550c4762a1bSJed Brown /* Test C = A*B */ 5519566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(fullMatMatMultStage)); 5529566063dSJacob Faibussowitsch PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &C)); 553c4762a1bSJed Brown 554c4762a1bSJed Brown /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ 5559566063dSJacob Faibussowitsch PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP)); 5569566063dSJacob Faibussowitsch PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy)); 5579566063dSJacob Faibussowitsch PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP_squared)); 558c4762a1bSJed Brown 5599566063dSJacob Faibussowitsch PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD)); 5609566063dSJacob Faibussowitsch PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD)); 561c4762a1bSJed Brown 5629566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP_squared)); 5639566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP_copy)); 5649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP)); 5659566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 5669566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 5679566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 5689566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 569b122ec5aSJacob Faibussowitsch return 0; 570c4762a1bSJed Brown } 571c4762a1bSJed Brown 572c4762a1bSJed Brown /*TEST 573c4762a1bSJed Brown 574c4762a1bSJed Brown test: 575c4762a1bSJed Brown suffix: 1 576c4762a1bSJed Brown nsize: 1 577c20d7725SJed Brown args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted 578c4762a1bSJed Brown 579c4762a1bSJed Brown test: 580c4762a1bSJed Brown suffix: 2 581c4762a1bSJed Brown nsize: 1 582c4762a1bSJed Brown args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge 583c4762a1bSJed Brown 584c4762a1bSJed Brown test: 585c4762a1bSJed Brown suffix: 3 586c4762a1bSJed Brown nsize: 4 587c4762a1bSJed Brown args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi 588c4762a1bSJed Brown 589c4762a1bSJed Brown TEST*/ 590