xref: /petsc/src/mat/tests/ex226.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
1c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
2c4762a1bSJed Brown 
3c4762a1bSJed Brown #include <petscmat.h>
4c4762a1bSJed Brown 
5c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
6*d71ae5a4SJacob Faibussowitsch int global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n)
7*d71ae5a4SJacob Faibussowitsch {
89371c9d4SSatish Balay   return i + j * m + k * m * n;
99371c9d4SSatish Balay }
10c4762a1bSJed Brown 
11*d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv)
12*d71ae5a4SJacob Faibussowitsch {
13c4762a1bSJed Brown   Mat         A, B, C, PtAP, PtAP_copy, PtAP_squared;
14c4762a1bSJed Brown   PetscInt    i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1;
15c4762a1bSJed Brown   PetscScalar v;
16c4762a1bSJed Brown   PetscBool   equal = PETSC_FALSE, mat_view = PETSC_FALSE;
17c4762a1bSJed Brown   char        stencil[PETSC_MAX_PATH_LEN];
18c4762a1bSJed Brown #if defined(PETSC_USE_LOG)
19c4762a1bSJed Brown   PetscLogStage fullMatMatMultStage;
20c4762a1bSJed Brown #endif
21c4762a1bSJed Brown 
22327415f7SBarry Smith   PetscFunctionBeginUser;
239566063dSJacob Faibussowitsch   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
249566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
259566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
269566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL));
279566063dSJacob Faibussowitsch   PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view));
289566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL));
29c4762a1bSJed Brown 
30c4762a1bSJed Brown   /* Create a aij matrix A */
31c4762a1bSJed Brown   M = N = m * n * o;
329566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
339566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N));
349566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATAIJ));
359566063dSJacob Faibussowitsch   PetscCall(MatSetFromOptions(A));
36c4762a1bSJed Brown 
37c4762a1bSJed Brown   /* Consistency checks */
38bc30f867SBarry Smith   PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!");
39c4762a1bSJed Brown 
40c4762a1bSJed Brown   /************ 2D stencils ***************/
419566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(stencil, "2d5point", &equal));
42c4762a1bSJed Brown   if (equal) { /* 5-point stencil, 2D */
439566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
449566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
459566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
46c4762a1bSJed Brown     for (Ii = Istart; Ii < Iend; Ii++) {
479371c9d4SSatish Balay       v = -1.0;
489371c9d4SSatish Balay       k = Ii / (m * n);
499371c9d4SSatish Balay       j = (Ii - k * m * n) / m;
509371c9d4SSatish Balay       i = (Ii - k * m * n - j * m);
519371c9d4SSatish Balay       if (i > 0) {
529371c9d4SSatish Balay         J = global_index(i - 1, j, k, m, n);
539371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
549371c9d4SSatish Balay       }
559371c9d4SSatish Balay       if (i < m - 1) {
569371c9d4SSatish Balay         J = global_index(i + 1, j, k, m, n);
579371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
589371c9d4SSatish Balay       }
599371c9d4SSatish Balay       if (j > 0) {
609371c9d4SSatish Balay         J = global_index(i, j - 1, k, m, n);
619371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
629371c9d4SSatish Balay       }
639371c9d4SSatish Balay       if (j < n - 1) {
649371c9d4SSatish Balay         J = global_index(i, j + 1, k, m, n);
659371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
669371c9d4SSatish Balay       }
679371c9d4SSatish Balay       v = 4.0;
689371c9d4SSatish Balay       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
69c4762a1bSJed Brown     }
70c4762a1bSJed Brown   }
719566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(stencil, "2d9point", &equal));
72c4762a1bSJed Brown   if (equal) { /* 9-point stencil, 2D */
739566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
749566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
759566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
76c4762a1bSJed Brown     for (Ii = Istart; Ii < Iend; Ii++) {
779371c9d4SSatish Balay       v = -1.0;
789371c9d4SSatish Balay       k = Ii / (m * n);
799371c9d4SSatish Balay       j = (Ii - k * m * n) / m;
809371c9d4SSatish Balay       i = (Ii - k * m * n - j * m);
819371c9d4SSatish Balay       if (i > 0) {
829371c9d4SSatish Balay         J = global_index(i - 1, j, k, m, n);
839371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
849371c9d4SSatish Balay       }
859371c9d4SSatish Balay       if (i > 0 && j > 0) {
869371c9d4SSatish Balay         J = global_index(i - 1, j - 1, k, m, n);
879371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
889371c9d4SSatish Balay       }
899371c9d4SSatish Balay       if (j > 0) {
909371c9d4SSatish Balay         J = global_index(i, j - 1, k, m, n);
919371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
929371c9d4SSatish Balay       }
939371c9d4SSatish Balay       if (i < m - 1 && j > 0) {
949371c9d4SSatish Balay         J = global_index(i + 1, j - 1, k, m, n);
959371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
969371c9d4SSatish Balay       }
979371c9d4SSatish Balay       if (i < m - 1) {
989371c9d4SSatish Balay         J = global_index(i + 1, j, k, m, n);
999371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1009371c9d4SSatish Balay       }
1019371c9d4SSatish Balay       if (i < m - 1 && j < n - 1) {
1029371c9d4SSatish Balay         J = global_index(i + 1, j + 1, k, m, n);
1039371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1049371c9d4SSatish Balay       }
1059371c9d4SSatish Balay       if (j < n - 1) {
1069371c9d4SSatish Balay         J = global_index(i, j + 1, k, m, n);
1079371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1089371c9d4SSatish Balay       }
1099371c9d4SSatish Balay       if (i > 0 && j < n - 1) {
1109371c9d4SSatish Balay         J = global_index(i - 1, j + 1, k, m, n);
1119371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1129371c9d4SSatish Balay       }
1139371c9d4SSatish Balay       v = 8.0;
1149371c9d4SSatish Balay       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
115c4762a1bSJed Brown     }
116c4762a1bSJed Brown   }
1179566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(stencil, "2d9point2", &equal));
118c4762a1bSJed Brown   if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
1199566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
1209566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
1219566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
122c4762a1bSJed Brown     for (Ii = Istart; Ii < Iend; Ii++) {
1239371c9d4SSatish Balay       v = -1.0;
1249371c9d4SSatish Balay       k = Ii / (m * n);
1259371c9d4SSatish Balay       j = (Ii - k * m * n) / m;
1269371c9d4SSatish Balay       i = (Ii - k * m * n - j * m);
1279371c9d4SSatish Balay       if (i > 0) {
1289371c9d4SSatish Balay         J = global_index(i - 1, j, k, m, n);
1299371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1309371c9d4SSatish Balay       }
1319371c9d4SSatish Balay       if (i > 1) {
1329371c9d4SSatish Balay         J = global_index(i - 2, j, k, m, n);
1339371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1349371c9d4SSatish Balay       }
1359371c9d4SSatish Balay       if (i < m - 1) {
1369371c9d4SSatish Balay         J = global_index(i + 1, j, k, m, n);
1379371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1389371c9d4SSatish Balay       }
1399371c9d4SSatish Balay       if (i < m - 2) {
1409371c9d4SSatish Balay         J = global_index(i + 2, j, k, m, n);
1419371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1429371c9d4SSatish Balay       }
1439371c9d4SSatish Balay       if (j > 0) {
1449371c9d4SSatish Balay         J = global_index(i, j - 1, k, m, n);
1459371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1469371c9d4SSatish Balay       }
1479371c9d4SSatish Balay       if (j > 1) {
1489371c9d4SSatish Balay         J = global_index(i, j - 2, k, m, n);
1499371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1509371c9d4SSatish Balay       }
1519371c9d4SSatish Balay       if (j < n - 1) {
1529371c9d4SSatish Balay         J = global_index(i, j + 1, k, m, n);
1539371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1549371c9d4SSatish Balay       }
1559371c9d4SSatish Balay       if (j < n - 2) {
1569371c9d4SSatish Balay         J = global_index(i, j + 2, k, m, n);
1579371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1589371c9d4SSatish Balay       }
1599371c9d4SSatish Balay       v = 8.0;
1609371c9d4SSatish Balay       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
161c4762a1bSJed Brown     }
162c4762a1bSJed Brown   }
1639566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(stencil, "2d13point", &equal));
164c4762a1bSJed Brown   if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
1659566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
1669566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
1679566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
168c4762a1bSJed Brown     for (Ii = Istart; Ii < Iend; Ii++) {
1699371c9d4SSatish Balay       v = -1.0;
1709371c9d4SSatish Balay       k = Ii / (m * n);
1719371c9d4SSatish Balay       j = (Ii - k * m * n) / m;
1729371c9d4SSatish Balay       i = (Ii - k * m * n - j * m);
1739371c9d4SSatish Balay       if (i > 0) {
1749371c9d4SSatish Balay         J = global_index(i - 1, j, k, m, n);
1759371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1769371c9d4SSatish Balay       }
1779371c9d4SSatish Balay       if (i > 1) {
1789371c9d4SSatish Balay         J = global_index(i - 2, j, k, m, n);
1799371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1809371c9d4SSatish Balay       }
1819371c9d4SSatish Balay       if (i > 2) {
1829371c9d4SSatish Balay         J = global_index(i - 3, j, k, m, n);
1839371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1849371c9d4SSatish Balay       }
1859371c9d4SSatish Balay       if (i < m - 1) {
1869371c9d4SSatish Balay         J = global_index(i + 1, j, k, m, n);
1879371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1889371c9d4SSatish Balay       }
1899371c9d4SSatish Balay       if (i < m - 2) {
1909371c9d4SSatish Balay         J = global_index(i + 2, j, k, m, n);
1919371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1929371c9d4SSatish Balay       }
1939371c9d4SSatish Balay       if (i < m - 3) {
1949371c9d4SSatish Balay         J = global_index(i + 3, j, k, m, n);
1959371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1969371c9d4SSatish Balay       }
1979371c9d4SSatish Balay       if (j > 0) {
1989371c9d4SSatish Balay         J = global_index(i, j - 1, k, m, n);
1999371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2009371c9d4SSatish Balay       }
2019371c9d4SSatish Balay       if (j > 1) {
2029371c9d4SSatish Balay         J = global_index(i, j - 2, k, m, n);
2039371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2049371c9d4SSatish Balay       }
2059371c9d4SSatish Balay       if (j > 2) {
2069371c9d4SSatish Balay         J = global_index(i, j - 3, k, m, n);
2079371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2089371c9d4SSatish Balay       }
2099371c9d4SSatish Balay       if (j < n - 1) {
2109371c9d4SSatish Balay         J = global_index(i, j + 1, k, m, n);
2119371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2129371c9d4SSatish Balay       }
2139371c9d4SSatish Balay       if (j < n - 2) {
2149371c9d4SSatish Balay         J = global_index(i, j + 2, k, m, n);
2159371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2169371c9d4SSatish Balay       }
2179371c9d4SSatish Balay       if (j < n - 3) {
2189371c9d4SSatish Balay         J = global_index(i, j + 3, k, m, n);
2199371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2209371c9d4SSatish Balay       }
2219371c9d4SSatish Balay       v = 12.0;
2229371c9d4SSatish Balay       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
223c4762a1bSJed Brown     }
224c4762a1bSJed Brown   }
225c4762a1bSJed Brown   /************ 3D stencils ***************/
2269566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(stencil, "3d7point", &equal));
227c4762a1bSJed Brown   if (equal) { /* 7-point stencil, 3D */
2289566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL));
2299566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL));
2309566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
231c4762a1bSJed Brown     for (Ii = Istart; Ii < Iend; Ii++) {
2329371c9d4SSatish Balay       v = -1.0;
2339371c9d4SSatish Balay       k = Ii / (m * n);
2349371c9d4SSatish Balay       j = (Ii - k * m * n) / m;
2359371c9d4SSatish Balay       i = (Ii - k * m * n - j * m);
2369371c9d4SSatish Balay       if (i > 0) {
2379371c9d4SSatish Balay         J = global_index(i - 1, j, k, m, n);
2389371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2399371c9d4SSatish Balay       }
2409371c9d4SSatish Balay       if (i < m - 1) {
2419371c9d4SSatish Balay         J = global_index(i + 1, j, k, m, n);
2429371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2439371c9d4SSatish Balay       }
2449371c9d4SSatish Balay       if (j > 0) {
2459371c9d4SSatish Balay         J = global_index(i, j - 1, k, m, n);
2469371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2479371c9d4SSatish Balay       }
2489371c9d4SSatish Balay       if (j < n - 1) {
2499371c9d4SSatish Balay         J = global_index(i, j + 1, k, m, n);
2509371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2519371c9d4SSatish Balay       }
2529371c9d4SSatish Balay       if (k > 0) {
2539371c9d4SSatish Balay         J = global_index(i, j, k - 1, m, n);
2549371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2559371c9d4SSatish Balay       }
2569371c9d4SSatish Balay       if (k < o - 1) {
2579371c9d4SSatish Balay         J = global_index(i, j, k + 1, m, n);
2589371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2599371c9d4SSatish Balay       }
2609371c9d4SSatish Balay       v = 6.0;
2619371c9d4SSatish Balay       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
262c4762a1bSJed Brown     }
263c4762a1bSJed Brown   }
2649566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(stencil, "3d13point", &equal));
265c4762a1bSJed Brown   if (equal) { /* 13-point stencil, 3D */
2669566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
2679566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
2689566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
269c4762a1bSJed Brown     for (Ii = Istart; Ii < Iend; Ii++) {
2709371c9d4SSatish Balay       v = -1.0;
2719371c9d4SSatish Balay       k = Ii / (m * n);
2729371c9d4SSatish Balay       j = (Ii - k * m * n) / m;
2739371c9d4SSatish Balay       i = (Ii - k * m * n - j * m);
2749371c9d4SSatish Balay       if (i > 0) {
2759371c9d4SSatish Balay         J = global_index(i - 1, j, k, m, n);
2769371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2779371c9d4SSatish Balay       }
2789371c9d4SSatish Balay       if (i > 1) {
2799371c9d4SSatish Balay         J = global_index(i - 2, j, k, m, n);
2809371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2819371c9d4SSatish Balay       }
2829371c9d4SSatish Balay       if (i < m - 1) {
2839371c9d4SSatish Balay         J = global_index(i + 1, j, k, m, n);
2849371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2859371c9d4SSatish Balay       }
2869371c9d4SSatish Balay       if (i < m - 2) {
2879371c9d4SSatish Balay         J = global_index(i + 2, j, k, m, n);
2889371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2899371c9d4SSatish Balay       }
2909371c9d4SSatish Balay       if (j > 0) {
2919371c9d4SSatish Balay         J = global_index(i, j - 1, k, m, n);
2929371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2939371c9d4SSatish Balay       }
2949371c9d4SSatish Balay       if (j > 1) {
2959371c9d4SSatish Balay         J = global_index(i, j - 2, k, m, n);
2969371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2979371c9d4SSatish Balay       }
2989371c9d4SSatish Balay       if (j < n - 1) {
2999371c9d4SSatish Balay         J = global_index(i, j + 1, k, m, n);
3009371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3019371c9d4SSatish Balay       }
3029371c9d4SSatish Balay       if (j < n - 2) {
3039371c9d4SSatish Balay         J = global_index(i, j + 2, k, m, n);
3049371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3059371c9d4SSatish Balay       }
3069371c9d4SSatish Balay       if (k > 0) {
3079371c9d4SSatish Balay         J = global_index(i, j, k - 1, m, n);
3089371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3099371c9d4SSatish Balay       }
3109371c9d4SSatish Balay       if (k > 1) {
3119371c9d4SSatish Balay         J = global_index(i, j, k - 2, m, n);
3129371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3139371c9d4SSatish Balay       }
3149371c9d4SSatish Balay       if (k < o - 1) {
3159371c9d4SSatish Balay         J = global_index(i, j, k + 1, m, n);
3169371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3179371c9d4SSatish Balay       }
3189371c9d4SSatish Balay       if (k < o - 2) {
3199371c9d4SSatish Balay         J = global_index(i, j, k + 2, m, n);
3209371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3219371c9d4SSatish Balay       }
3229371c9d4SSatish Balay       v = 12.0;
3239371c9d4SSatish Balay       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
324c4762a1bSJed Brown     }
325c4762a1bSJed Brown   }
3269566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(stencil, "3d19point", &equal));
327c4762a1bSJed Brown   if (equal) { /* 19-point stencil, 3D */
3289566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL));
3299566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL));
3309566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
331c4762a1bSJed Brown     for (Ii = Istart; Ii < Iend; Ii++) {
3329371c9d4SSatish Balay       v = -1.0;
3339371c9d4SSatish Balay       k = Ii / (m * n);
3349371c9d4SSatish Balay       j = (Ii - k * m * n) / m;
3359371c9d4SSatish Balay       i = (Ii - k * m * n - j * m);
336c4762a1bSJed Brown       /* one hop */
3379371c9d4SSatish Balay       if (i > 0) {
3389371c9d4SSatish Balay         J = global_index(i - 1, j, k, m, n);
3399371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3409371c9d4SSatish Balay       }
3419371c9d4SSatish Balay       if (i < m - 1) {
3429371c9d4SSatish Balay         J = global_index(i + 1, j, k, m, n);
3439371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3449371c9d4SSatish Balay       }
3459371c9d4SSatish Balay       if (j > 0) {
3469371c9d4SSatish Balay         J = global_index(i, j - 1, k, m, n);
3479371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3489371c9d4SSatish Balay       }
3499371c9d4SSatish Balay       if (j < n - 1) {
3509371c9d4SSatish Balay         J = global_index(i, j + 1, k, m, n);
3519371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3529371c9d4SSatish Balay       }
3539371c9d4SSatish Balay       if (k > 0) {
3549371c9d4SSatish Balay         J = global_index(i, j, k - 1, m, n);
3559371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3569371c9d4SSatish Balay       }
3579371c9d4SSatish Balay       if (k < o - 1) {
3589371c9d4SSatish Balay         J = global_index(i, j, k + 1, m, n);
3599371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3609371c9d4SSatish Balay       }
361c4762a1bSJed Brown       /* two hops */
3629371c9d4SSatish Balay       if (i > 0 && j > 0) {
3639371c9d4SSatish Balay         J = global_index(i - 1, j - 1, k, m, n);
3649371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3659371c9d4SSatish Balay       }
3669371c9d4SSatish Balay       if (i > 0 && k > 0) {
3679371c9d4SSatish Balay         J = global_index(i - 1, j, k - 1, m, n);
3689371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3699371c9d4SSatish Balay       }
3709371c9d4SSatish Balay       if (i > 0 && j < n - 1) {
3719371c9d4SSatish Balay         J = global_index(i - 1, j + 1, k, m, n);
3729371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3739371c9d4SSatish Balay       }
3749371c9d4SSatish Balay       if (i > 0 && k < o - 1) {
3759371c9d4SSatish Balay         J = global_index(i - 1, j, k + 1, m, n);
3769371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3779371c9d4SSatish Balay       }
3789371c9d4SSatish Balay       if (i < m - 1 && j > 0) {
3799371c9d4SSatish Balay         J = global_index(i + 1, j - 1, k, m, n);
3809371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3819371c9d4SSatish Balay       }
3829371c9d4SSatish Balay       if (i < m - 1 && k > 0) {
3839371c9d4SSatish Balay         J = global_index(i + 1, j, k - 1, m, n);
3849371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3859371c9d4SSatish Balay       }
3869371c9d4SSatish Balay       if (i < m - 1 && j < n - 1) {
3879371c9d4SSatish Balay         J = global_index(i + 1, j + 1, k, m, n);
3889371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3899371c9d4SSatish Balay       }
3909371c9d4SSatish Balay       if (i < m - 1 && k < o - 1) {
3919371c9d4SSatish Balay         J = global_index(i + 1, j, k + 1, m, n);
3929371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3939371c9d4SSatish Balay       }
3949371c9d4SSatish Balay       if (j > 0 && k > 0) {
3959371c9d4SSatish Balay         J = global_index(i, j - 1, k - 1, m, n);
3969371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3979371c9d4SSatish Balay       }
3989371c9d4SSatish Balay       if (j > 0 && k < o - 1) {
3999371c9d4SSatish Balay         J = global_index(i, j - 1, k + 1, m, n);
4009371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4019371c9d4SSatish Balay       }
4029371c9d4SSatish Balay       if (j < n - 1 && k > 0) {
4039371c9d4SSatish Balay         J = global_index(i, j + 1, k - 1, m, n);
4049371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4059371c9d4SSatish Balay       }
4069371c9d4SSatish Balay       if (j < n - 1 && k < o - 1) {
4079371c9d4SSatish Balay         J = global_index(i, j + 1, k + 1, m, n);
4089371c9d4SSatish Balay         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4099371c9d4SSatish Balay       }
4109371c9d4SSatish Balay       v = 18.0;
4119371c9d4SSatish Balay       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
412c4762a1bSJed Brown     }
413c4762a1bSJed Brown   }
4149566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(stencil, "3d27point", &equal));
415c4762a1bSJed Brown   if (equal) { /* 27-point stencil, 3D */
4169566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL));
4179566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL));
4189566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
419c4762a1bSJed Brown     for (Ii = Istart; Ii < Iend; Ii++) {
4209371c9d4SSatish Balay       v = -1.0;
4219371c9d4SSatish Balay       k = Ii / (m * n);
4229371c9d4SSatish Balay       j = (Ii - k * m * n) / m;
4239371c9d4SSatish Balay       i = (Ii - k * m * n - j * m);
424c4762a1bSJed Brown       if (k > 0) {
425c4762a1bSJed Brown         if (j > 0) {
4269371c9d4SSatish Balay           if (i > 0) {
4279371c9d4SSatish Balay             J = global_index(i - 1, j - 1, k - 1, m, n);
4289371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4299371c9d4SSatish Balay           }
4309371c9d4SSatish Balay           J = global_index(i, j - 1, k - 1, m, n);
4319371c9d4SSatish Balay           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4329371c9d4SSatish Balay           if (i < m - 1) {
4339371c9d4SSatish Balay             J = global_index(i + 1, j - 1, k - 1, m, n);
4349371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4359371c9d4SSatish Balay           }
436c4762a1bSJed Brown         }
437c4762a1bSJed Brown         {
4389371c9d4SSatish Balay           if (i > 0) {
4399371c9d4SSatish Balay             J = global_index(i - 1, j, k - 1, m, n);
4409371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4419371c9d4SSatish Balay           }
4429371c9d4SSatish Balay           J = global_index(i, j, k - 1, m, n);
4439371c9d4SSatish Balay           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4449371c9d4SSatish Balay           if (i < m - 1) {
4459371c9d4SSatish Balay             J = global_index(i + 1, j, k - 1, m, n);
4469371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4479371c9d4SSatish Balay           }
448c4762a1bSJed Brown         }
449c4762a1bSJed Brown         if (j < n - 1) {
4509371c9d4SSatish Balay           if (i > 0) {
4519371c9d4SSatish Balay             J = global_index(i - 1, j + 1, k - 1, m, n);
4529371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4539371c9d4SSatish Balay           }
4549371c9d4SSatish Balay           J = global_index(i, j + 1, k - 1, m, n);
4559371c9d4SSatish Balay           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4569371c9d4SSatish Balay           if (i < m - 1) {
4579371c9d4SSatish Balay             J = global_index(i + 1, j + 1, k - 1, m, n);
4589371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4599371c9d4SSatish Balay           }
460c4762a1bSJed Brown         }
461c4762a1bSJed Brown       }
462c4762a1bSJed Brown       {
463c4762a1bSJed Brown         if (j > 0) {
4649371c9d4SSatish Balay           if (i > 0) {
4659371c9d4SSatish Balay             J = global_index(i - 1, j - 1, k, m, n);
4669371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4679371c9d4SSatish Balay           }
4689371c9d4SSatish Balay           J = global_index(i, j - 1, k, m, n);
4699371c9d4SSatish Balay           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4709371c9d4SSatish Balay           if (i < m - 1) {
4719371c9d4SSatish Balay             J = global_index(i + 1, j - 1, k, m, n);
4729371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4739371c9d4SSatish Balay           }
474c4762a1bSJed Brown         }
475c4762a1bSJed Brown         {
4769371c9d4SSatish Balay           if (i > 0) {
4779371c9d4SSatish Balay             J = global_index(i - 1, j, k, m, n);
4789371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4799371c9d4SSatish Balay           }
4809371c9d4SSatish Balay           J = global_index(i, j, k, m, n);
4819371c9d4SSatish Balay           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4829371c9d4SSatish Balay           if (i < m - 1) {
4839371c9d4SSatish Balay             J = global_index(i + 1, j, k, m, n);
4849371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4859371c9d4SSatish Balay           }
486c4762a1bSJed Brown         }
487c4762a1bSJed Brown         if (j < n - 1) {
4889371c9d4SSatish Balay           if (i > 0) {
4899371c9d4SSatish Balay             J = global_index(i - 1, j + 1, k, m, n);
4909371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4919371c9d4SSatish Balay           }
4929371c9d4SSatish Balay           J = global_index(i, j + 1, k, m, n);
4939371c9d4SSatish Balay           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4949371c9d4SSatish Balay           if (i < m - 1) {
4959371c9d4SSatish Balay             J = global_index(i + 1, j + 1, k, m, n);
4969371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4979371c9d4SSatish Balay           }
498c4762a1bSJed Brown         }
499c4762a1bSJed Brown       }
500c4762a1bSJed Brown       if (k < o - 1) {
501c4762a1bSJed Brown         if (j > 0) {
5029371c9d4SSatish Balay           if (i > 0) {
5039371c9d4SSatish Balay             J = global_index(i - 1, j - 1, k + 1, m, n);
5049371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5059371c9d4SSatish Balay           }
5069371c9d4SSatish Balay           J = global_index(i, j - 1, k + 1, m, n);
5079371c9d4SSatish Balay           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5089371c9d4SSatish Balay           if (i < m - 1) {
5099371c9d4SSatish Balay             J = global_index(i + 1, j - 1, k + 1, m, n);
5109371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5119371c9d4SSatish Balay           }
512c4762a1bSJed Brown         }
513c4762a1bSJed Brown         {
5149371c9d4SSatish Balay           if (i > 0) {
5159371c9d4SSatish Balay             J = global_index(i - 1, j, k + 1, m, n);
5169371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5179371c9d4SSatish Balay           }
5189371c9d4SSatish Balay           J = global_index(i, j, k + 1, m, n);
5199371c9d4SSatish Balay           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5209371c9d4SSatish Balay           if (i < m - 1) {
5219371c9d4SSatish Balay             J = global_index(i + 1, j, k + 1, m, n);
5229371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5239371c9d4SSatish Balay           }
524c4762a1bSJed Brown         }
525c4762a1bSJed Brown         if (j < n - 1) {
5269371c9d4SSatish Balay           if (i > 0) {
5279371c9d4SSatish Balay             J = global_index(i - 1, j + 1, k + 1, m, n);
5289371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5299371c9d4SSatish Balay           }
5309371c9d4SSatish Balay           J = global_index(i, j + 1, k + 1, m, n);
5319371c9d4SSatish Balay           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5329371c9d4SSatish Balay           if (i < m - 1) {
5339371c9d4SSatish Balay             J = global_index(i + 1, j + 1, k + 1, m, n);
5349371c9d4SSatish Balay             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
535c4762a1bSJed Brown           }
536c4762a1bSJed Brown         }
5379371c9d4SSatish Balay       }
5389371c9d4SSatish Balay       v = 26.0;
5399371c9d4SSatish Balay       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
540c4762a1bSJed Brown     }
541c4762a1bSJed Brown   }
5429566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
5439566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
544c4762a1bSJed Brown 
545c4762a1bSJed Brown   /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
5469566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
547c4762a1bSJed Brown 
5489566063dSJacob Faibussowitsch   PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage));
549c4762a1bSJed Brown 
550c4762a1bSJed Brown   /* Test C = A*B */
5519566063dSJacob Faibussowitsch   PetscCall(PetscLogStagePush(fullMatMatMultStage));
5529566063dSJacob Faibussowitsch   PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &C));
553c4762a1bSJed Brown 
554c4762a1bSJed Brown   /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C)  */
5559566063dSJacob Faibussowitsch   PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP));
5569566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy));
5579566063dSJacob Faibussowitsch   PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP_squared));
558c4762a1bSJed Brown 
5599566063dSJacob Faibussowitsch   PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
5609566063dSJacob Faibussowitsch   PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD));
561c4762a1bSJed Brown 
5629566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&PtAP_squared));
5639566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&PtAP_copy));
5649566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&PtAP));
5659566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&C));
5669566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
5679566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
5689566063dSJacob Faibussowitsch   PetscCall(PetscFinalize());
569b122ec5aSJacob Faibussowitsch   return 0;
570c4762a1bSJed Brown }
571c4762a1bSJed Brown 
572c4762a1bSJed Brown /*TEST
573c4762a1bSJed Brown 
574c4762a1bSJed Brown  test:
575c4762a1bSJed Brown       suffix: 1
576c4762a1bSJed Brown       nsize: 1
577c20d7725SJed Brown       args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
578c4762a1bSJed Brown 
579c4762a1bSJed Brown  test:
580c4762a1bSJed Brown        suffix: 2
581c4762a1bSJed Brown        nsize: 1
582c4762a1bSJed Brown        args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
583c4762a1bSJed Brown 
584c4762a1bSJed Brown  test:
585c4762a1bSJed Brown       suffix: 3
586c4762a1bSJed Brown       nsize: 4
587c4762a1bSJed Brown       args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
588c4762a1bSJed Brown 
589c4762a1bSJed Brown TEST*/
590