1c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ 66497c311SBarry Smith PetscInt global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n) 7d71ae5a4SJacob Faibussowitsch { 89371c9d4SSatish Balay return i + j * m + k * m * n; 99371c9d4SSatish Balay } 10c4762a1bSJed Brown 11d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv) 12d71ae5a4SJacob Faibussowitsch { 13c4762a1bSJed Brown Mat A, B, C, PtAP, PtAP_copy, PtAP_squared; 14c4762a1bSJed Brown PetscInt i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1; 15c4762a1bSJed Brown PetscScalar v; 16c4762a1bSJed Brown PetscBool equal = PETSC_FALSE, mat_view = PETSC_FALSE; 17c4762a1bSJed Brown char stencil[PETSC_MAX_PATH_LEN]; 18c4762a1bSJed Brown PetscLogStage fullMatMatMultStage; 19c4762a1bSJed Brown 20327415f7SBarry Smith PetscFunctionBeginUser; 21*c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 229566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL)); 239566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 249566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL)); 259566063dSJacob Faibussowitsch PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view)); 269566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL)); 27c4762a1bSJed Brown 28c4762a1bSJed Brown /* Create a aij matrix A */ 29c4762a1bSJed Brown M = N = m * n * o; 309566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 319566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N)); 329566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATAIJ)); 339566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 34c4762a1bSJed Brown 35c4762a1bSJed Brown /* Consistency checks */ 36bc30f867SBarry Smith PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!"); 37c4762a1bSJed Brown 38c4762a1bSJed Brown /************ 2D stencils ***************/ 399566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d5point", &equal)); 40c4762a1bSJed Brown if (equal) { /* 5-point stencil, 2D */ 419566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL)); 429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL)); 439566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 44c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 459371c9d4SSatish Balay v = -1.0; 469371c9d4SSatish Balay k = Ii / (m * n); 479371c9d4SSatish Balay j = (Ii - k * m * n) / m; 489371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 499371c9d4SSatish Balay if (i > 0) { 509371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 519371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 529371c9d4SSatish Balay } 539371c9d4SSatish Balay if (i < m - 1) { 549371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 559371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 569371c9d4SSatish Balay } 579371c9d4SSatish Balay if (j > 0) { 589371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 599371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 609371c9d4SSatish Balay } 619371c9d4SSatish Balay if (j < n - 1) { 629371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 639371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 649371c9d4SSatish Balay } 659371c9d4SSatish Balay v = 4.0; 669371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 67c4762a1bSJed Brown } 68c4762a1bSJed Brown } 699566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d9point", &equal)); 70c4762a1bSJed Brown if (equal) { /* 9-point stencil, 2D */ 719566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); 729566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); 739566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 74c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 759371c9d4SSatish Balay v = -1.0; 769371c9d4SSatish Balay k = Ii / (m * n); 779371c9d4SSatish Balay j = (Ii - k * m * n) / m; 789371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 799371c9d4SSatish Balay if (i > 0) { 809371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 819371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 829371c9d4SSatish Balay } 839371c9d4SSatish Balay if (i > 0 && j > 0) { 849371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n); 859371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 869371c9d4SSatish Balay } 879371c9d4SSatish Balay if (j > 0) { 889371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 899371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 909371c9d4SSatish Balay } 919371c9d4SSatish Balay if (i < m - 1 && j > 0) { 929371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n); 939371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 949371c9d4SSatish Balay } 959371c9d4SSatish Balay if (i < m - 1) { 969371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 979371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 989371c9d4SSatish Balay } 999371c9d4SSatish Balay if (i < m - 1 && j < n - 1) { 1009371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n); 1019371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1029371c9d4SSatish Balay } 1039371c9d4SSatish Balay if (j < n - 1) { 1049371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 1059371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1069371c9d4SSatish Balay } 1079371c9d4SSatish Balay if (i > 0 && j < n - 1) { 1089371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n); 1099371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1109371c9d4SSatish Balay } 1119371c9d4SSatish Balay v = 8.0; 1129371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 113c4762a1bSJed Brown } 114c4762a1bSJed Brown } 1159566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d9point2", &equal)); 116c4762a1bSJed Brown if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ 1179566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); 1189566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); 1199566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 120c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 1219371c9d4SSatish Balay v = -1.0; 1229371c9d4SSatish Balay k = Ii / (m * n); 1239371c9d4SSatish Balay j = (Ii - k * m * n) / m; 1249371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 1259371c9d4SSatish Balay if (i > 0) { 1269371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 1279371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1289371c9d4SSatish Balay } 1299371c9d4SSatish Balay if (i > 1) { 1309371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n); 1319371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1329371c9d4SSatish Balay } 1339371c9d4SSatish Balay if (i < m - 1) { 1349371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 1359371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1369371c9d4SSatish Balay } 1379371c9d4SSatish Balay if (i < m - 2) { 1389371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n); 1399371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1409371c9d4SSatish Balay } 1419371c9d4SSatish Balay if (j > 0) { 1429371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 1439371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1449371c9d4SSatish Balay } 1459371c9d4SSatish Balay if (j > 1) { 1469371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n); 1479371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1489371c9d4SSatish Balay } 1499371c9d4SSatish Balay if (j < n - 1) { 1509371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 1519371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1529371c9d4SSatish Balay } 1539371c9d4SSatish Balay if (j < n - 2) { 1549371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n); 1559371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1569371c9d4SSatish Balay } 1579371c9d4SSatish Balay v = 8.0; 1589371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 159c4762a1bSJed Brown } 160c4762a1bSJed Brown } 1619566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d13point", &equal)); 162c4762a1bSJed Brown if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ 1639566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); 1649566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); 1659566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 166c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 1679371c9d4SSatish Balay v = -1.0; 1689371c9d4SSatish Balay k = Ii / (m * n); 1699371c9d4SSatish Balay j = (Ii - k * m * n) / m; 1709371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 1719371c9d4SSatish Balay if (i > 0) { 1729371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 1739371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1749371c9d4SSatish Balay } 1759371c9d4SSatish Balay if (i > 1) { 1769371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n); 1779371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1789371c9d4SSatish Balay } 1799371c9d4SSatish Balay if (i > 2) { 1809371c9d4SSatish Balay J = global_index(i - 3, j, k, m, n); 1819371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1829371c9d4SSatish Balay } 1839371c9d4SSatish Balay if (i < m - 1) { 1849371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 1859371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1869371c9d4SSatish Balay } 1879371c9d4SSatish Balay if (i < m - 2) { 1889371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n); 1899371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1909371c9d4SSatish Balay } 1919371c9d4SSatish Balay if (i < m - 3) { 1929371c9d4SSatish Balay J = global_index(i + 3, j, k, m, n); 1939371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1949371c9d4SSatish Balay } 1959371c9d4SSatish Balay if (j > 0) { 1969371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 1979371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 1989371c9d4SSatish Balay } 1999371c9d4SSatish Balay if (j > 1) { 2009371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n); 2019371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2029371c9d4SSatish Balay } 2039371c9d4SSatish Balay if (j > 2) { 2049371c9d4SSatish Balay J = global_index(i, j - 3, k, m, n); 2059371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2069371c9d4SSatish Balay } 2079371c9d4SSatish Balay if (j < n - 1) { 2089371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 2099371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2109371c9d4SSatish Balay } 2119371c9d4SSatish Balay if (j < n - 2) { 2129371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n); 2139371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2149371c9d4SSatish Balay } 2159371c9d4SSatish Balay if (j < n - 3) { 2169371c9d4SSatish Balay J = global_index(i, j + 3, k, m, n); 2179371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2189371c9d4SSatish Balay } 2199371c9d4SSatish Balay v = 12.0; 2209371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 221c4762a1bSJed Brown } 222c4762a1bSJed Brown } 223c4762a1bSJed Brown /************ 3D stencils ***************/ 2249566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d7point", &equal)); 225c4762a1bSJed Brown if (equal) { /* 7-point stencil, 3D */ 2269566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL)); 2279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL)); 2289566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 229c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 2309371c9d4SSatish Balay v = -1.0; 2319371c9d4SSatish Balay k = Ii / (m * n); 2329371c9d4SSatish Balay j = (Ii - k * m * n) / m; 2339371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 2349371c9d4SSatish Balay if (i > 0) { 2359371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 2369371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2379371c9d4SSatish Balay } 2389371c9d4SSatish Balay if (i < m - 1) { 2399371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 2409371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2419371c9d4SSatish Balay } 2429371c9d4SSatish Balay if (j > 0) { 2439371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 2449371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2459371c9d4SSatish Balay } 2469371c9d4SSatish Balay if (j < n - 1) { 2479371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 2489371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2499371c9d4SSatish Balay } 2509371c9d4SSatish Balay if (k > 0) { 2519371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 2529371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2539371c9d4SSatish Balay } 2549371c9d4SSatish Balay if (k < o - 1) { 2559371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 2569371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2579371c9d4SSatish Balay } 2589371c9d4SSatish Balay v = 6.0; 2599371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 260c4762a1bSJed Brown } 261c4762a1bSJed Brown } 2629566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d13point", &equal)); 263c4762a1bSJed Brown if (equal) { /* 13-point stencil, 3D */ 2649566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); 2659566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); 2669566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 267c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 2689371c9d4SSatish Balay v = -1.0; 2699371c9d4SSatish Balay k = Ii / (m * n); 2709371c9d4SSatish Balay j = (Ii - k * m * n) / m; 2719371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 2729371c9d4SSatish Balay if (i > 0) { 2739371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 2749371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2759371c9d4SSatish Balay } 2769371c9d4SSatish Balay if (i > 1) { 2779371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n); 2789371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2799371c9d4SSatish Balay } 2809371c9d4SSatish Balay if (i < m - 1) { 2819371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 2829371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2839371c9d4SSatish Balay } 2849371c9d4SSatish Balay if (i < m - 2) { 2859371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n); 2869371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2879371c9d4SSatish Balay } 2889371c9d4SSatish Balay if (j > 0) { 2899371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 2909371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2919371c9d4SSatish Balay } 2929371c9d4SSatish Balay if (j > 1) { 2939371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n); 2949371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2959371c9d4SSatish Balay } 2969371c9d4SSatish Balay if (j < n - 1) { 2979371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 2989371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 2999371c9d4SSatish Balay } 3009371c9d4SSatish Balay if (j < n - 2) { 3019371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n); 3029371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3039371c9d4SSatish Balay } 3049371c9d4SSatish Balay if (k > 0) { 3059371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 3069371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3079371c9d4SSatish Balay } 3089371c9d4SSatish Balay if (k > 1) { 3099371c9d4SSatish Balay J = global_index(i, j, k - 2, m, n); 3109371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3119371c9d4SSatish Balay } 3129371c9d4SSatish Balay if (k < o - 1) { 3139371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 3149371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3159371c9d4SSatish Balay } 3169371c9d4SSatish Balay if (k < o - 2) { 3179371c9d4SSatish Balay J = global_index(i, j, k + 2, m, n); 3189371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3199371c9d4SSatish Balay } 3209371c9d4SSatish Balay v = 12.0; 3219371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 322c4762a1bSJed Brown } 323c4762a1bSJed Brown } 3249566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d19point", &equal)); 325c4762a1bSJed Brown if (equal) { /* 19-point stencil, 3D */ 3269566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL)); 3279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL)); 3289566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 329c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 3309371c9d4SSatish Balay v = -1.0; 3319371c9d4SSatish Balay k = Ii / (m * n); 3329371c9d4SSatish Balay j = (Ii - k * m * n) / m; 3339371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 334c4762a1bSJed Brown /* one hop */ 3359371c9d4SSatish Balay if (i > 0) { 3369371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 3379371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3389371c9d4SSatish Balay } 3399371c9d4SSatish Balay if (i < m - 1) { 3409371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 3419371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3429371c9d4SSatish Balay } 3439371c9d4SSatish Balay if (j > 0) { 3449371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 3459371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3469371c9d4SSatish Balay } 3479371c9d4SSatish Balay if (j < n - 1) { 3489371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 3499371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3509371c9d4SSatish Balay } 3519371c9d4SSatish Balay if (k > 0) { 3529371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 3539371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3549371c9d4SSatish Balay } 3559371c9d4SSatish Balay if (k < o - 1) { 3569371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 3579371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3589371c9d4SSatish Balay } 359c4762a1bSJed Brown /* two hops */ 3609371c9d4SSatish Balay if (i > 0 && j > 0) { 3619371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n); 3629371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3639371c9d4SSatish Balay } 3649371c9d4SSatish Balay if (i > 0 && k > 0) { 3659371c9d4SSatish Balay J = global_index(i - 1, j, k - 1, m, n); 3669371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3679371c9d4SSatish Balay } 3689371c9d4SSatish Balay if (i > 0 && j < n - 1) { 3699371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n); 3709371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3719371c9d4SSatish Balay } 3729371c9d4SSatish Balay if (i > 0 && k < o - 1) { 3739371c9d4SSatish Balay J = global_index(i - 1, j, k + 1, m, n); 3749371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3759371c9d4SSatish Balay } 3769371c9d4SSatish Balay if (i < m - 1 && j > 0) { 3779371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n); 3789371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3799371c9d4SSatish Balay } 3809371c9d4SSatish Balay if (i < m - 1 && k > 0) { 3819371c9d4SSatish Balay J = global_index(i + 1, j, k - 1, m, n); 3829371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3839371c9d4SSatish Balay } 3849371c9d4SSatish Balay if (i < m - 1 && j < n - 1) { 3859371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n); 3869371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3879371c9d4SSatish Balay } 3889371c9d4SSatish Balay if (i < m - 1 && k < o - 1) { 3899371c9d4SSatish Balay J = global_index(i + 1, j, k + 1, m, n); 3909371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3919371c9d4SSatish Balay } 3929371c9d4SSatish Balay if (j > 0 && k > 0) { 3939371c9d4SSatish Balay J = global_index(i, j - 1, k - 1, m, n); 3949371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3959371c9d4SSatish Balay } 3969371c9d4SSatish Balay if (j > 0 && k < o - 1) { 3979371c9d4SSatish Balay J = global_index(i, j - 1, k + 1, m, n); 3989371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 3999371c9d4SSatish Balay } 4009371c9d4SSatish Balay if (j < n - 1 && k > 0) { 4019371c9d4SSatish Balay J = global_index(i, j + 1, k - 1, m, n); 4029371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4039371c9d4SSatish Balay } 4049371c9d4SSatish Balay if (j < n - 1 && k < o - 1) { 4059371c9d4SSatish Balay J = global_index(i, j + 1, k + 1, m, n); 4069371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4079371c9d4SSatish Balay } 4089371c9d4SSatish Balay v = 18.0; 4099371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 410c4762a1bSJed Brown } 411c4762a1bSJed Brown } 4129566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d27point", &equal)); 413c4762a1bSJed Brown if (equal) { /* 27-point stencil, 3D */ 4149566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL)); 4159566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL)); 4169566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 417c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 4189371c9d4SSatish Balay v = -1.0; 4199371c9d4SSatish Balay k = Ii / (m * n); 4209371c9d4SSatish Balay j = (Ii - k * m * n) / m; 4219371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 422c4762a1bSJed Brown if (k > 0) { 423c4762a1bSJed Brown if (j > 0) { 4249371c9d4SSatish Balay if (i > 0) { 4259371c9d4SSatish Balay J = global_index(i - 1, j - 1, k - 1, m, n); 4269371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4279371c9d4SSatish Balay } 4289371c9d4SSatish Balay J = global_index(i, j - 1, k - 1, m, n); 4299371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4309371c9d4SSatish Balay if (i < m - 1) { 4319371c9d4SSatish Balay J = global_index(i + 1, j - 1, k - 1, m, n); 4329371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4339371c9d4SSatish Balay } 434c4762a1bSJed Brown } 435c4762a1bSJed Brown { 4369371c9d4SSatish Balay if (i > 0) { 4379371c9d4SSatish Balay J = global_index(i - 1, j, k - 1, m, n); 4389371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4399371c9d4SSatish Balay } 4409371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 4419371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4429371c9d4SSatish Balay if (i < m - 1) { 4439371c9d4SSatish Balay J = global_index(i + 1, j, k - 1, m, n); 4449371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4459371c9d4SSatish Balay } 446c4762a1bSJed Brown } 447c4762a1bSJed Brown if (j < n - 1) { 4489371c9d4SSatish Balay if (i > 0) { 4499371c9d4SSatish Balay J = global_index(i - 1, j + 1, k - 1, m, n); 4509371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4519371c9d4SSatish Balay } 4529371c9d4SSatish Balay J = global_index(i, j + 1, k - 1, m, n); 4539371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4549371c9d4SSatish Balay if (i < m - 1) { 4559371c9d4SSatish Balay J = global_index(i + 1, j + 1, k - 1, m, n); 4569371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4579371c9d4SSatish Balay } 458c4762a1bSJed Brown } 459c4762a1bSJed Brown } 460c4762a1bSJed Brown { 461c4762a1bSJed Brown if (j > 0) { 4629371c9d4SSatish Balay if (i > 0) { 4639371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n); 4649371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4659371c9d4SSatish Balay } 4669371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 4679371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4689371c9d4SSatish Balay if (i < m - 1) { 4699371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n); 4709371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4719371c9d4SSatish Balay } 472c4762a1bSJed Brown } 473c4762a1bSJed Brown { 4749371c9d4SSatish Balay if (i > 0) { 4759371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 4769371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4779371c9d4SSatish Balay } 4789371c9d4SSatish Balay J = global_index(i, j, k, m, n); 4799371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4809371c9d4SSatish Balay if (i < m - 1) { 4819371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 4829371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4839371c9d4SSatish Balay } 484c4762a1bSJed Brown } 485c4762a1bSJed Brown if (j < n - 1) { 4869371c9d4SSatish Balay if (i > 0) { 4879371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n); 4889371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4899371c9d4SSatish Balay } 4909371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 4919371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4929371c9d4SSatish Balay if (i < m - 1) { 4939371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n); 4949371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 4959371c9d4SSatish Balay } 496c4762a1bSJed Brown } 497c4762a1bSJed Brown } 498c4762a1bSJed Brown if (k < o - 1) { 499c4762a1bSJed Brown if (j > 0) { 5009371c9d4SSatish Balay if (i > 0) { 5019371c9d4SSatish Balay J = global_index(i - 1, j - 1, k + 1, m, n); 5029371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5039371c9d4SSatish Balay } 5049371c9d4SSatish Balay J = global_index(i, j - 1, k + 1, m, n); 5059371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5069371c9d4SSatish Balay if (i < m - 1) { 5079371c9d4SSatish Balay J = global_index(i + 1, j - 1, k + 1, m, n); 5089371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5099371c9d4SSatish Balay } 510c4762a1bSJed Brown } 511c4762a1bSJed Brown { 5129371c9d4SSatish Balay if (i > 0) { 5139371c9d4SSatish Balay J = global_index(i - 1, j, k + 1, m, n); 5149371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5159371c9d4SSatish Balay } 5169371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 5179371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5189371c9d4SSatish Balay if (i < m - 1) { 5199371c9d4SSatish Balay J = global_index(i + 1, j, k + 1, m, n); 5209371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5219371c9d4SSatish Balay } 522c4762a1bSJed Brown } 523c4762a1bSJed Brown if (j < n - 1) { 5249371c9d4SSatish Balay if (i > 0) { 5259371c9d4SSatish Balay J = global_index(i - 1, j + 1, k + 1, m, n); 5269371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5279371c9d4SSatish Balay } 5289371c9d4SSatish Balay J = global_index(i, j + 1, k + 1, m, n); 5299371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 5309371c9d4SSatish Balay if (i < m - 1) { 5319371c9d4SSatish Balay J = global_index(i + 1, j + 1, k + 1, m, n); 5329371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 533c4762a1bSJed Brown } 534c4762a1bSJed Brown } 5359371c9d4SSatish Balay } 5369371c9d4SSatish Balay v = 26.0; 5379371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 538c4762a1bSJed Brown } 539c4762a1bSJed Brown } 5409566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 5419566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 542c4762a1bSJed Brown 543c4762a1bSJed Brown /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ 5449566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 545c4762a1bSJed Brown 5469566063dSJacob Faibussowitsch PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage)); 547c4762a1bSJed Brown 548c4762a1bSJed Brown /* Test C = A*B */ 5499566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(fullMatMatMultStage)); 550fb842aefSJose E. Roman PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &C)); 551c4762a1bSJed Brown 552c4762a1bSJed Brown /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ 553fb842aefSJose E. Roman PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP)); 5549566063dSJacob Faibussowitsch PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy)); 555fb842aefSJose E. Roman PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP_squared)); 556c4762a1bSJed Brown 5579566063dSJacob Faibussowitsch PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD)); 5589566063dSJacob Faibussowitsch PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD)); 559c4762a1bSJed Brown 5609566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP_squared)); 5619566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP_copy)); 5629566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP)); 5639566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 5649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 5659566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 5669566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 567b122ec5aSJacob Faibussowitsch return 0; 568c4762a1bSJed Brown } 569c4762a1bSJed Brown 570c4762a1bSJed Brown /*TEST 571c4762a1bSJed Brown 572c4762a1bSJed Brown test: 573c4762a1bSJed Brown suffix: 1 574c4762a1bSJed Brown nsize: 1 575c20d7725SJed Brown args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted 576c4762a1bSJed Brown 577c4762a1bSJed Brown test: 578c4762a1bSJed Brown suffix: 2 579c4762a1bSJed Brown nsize: 1 580c4762a1bSJed Brown args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge 581c4762a1bSJed Brown 582c4762a1bSJed Brown test: 583c4762a1bSJed Brown suffix: 3 584c4762a1bSJed Brown nsize: 4 585c4762a1bSJed Brown args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi 586c4762a1bSJed Brown 587c4762a1bSJed Brown TEST*/ 588