1c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ 6*9371c9d4SSatish Balay int global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n) { 7*9371c9d4SSatish Balay return i + j * m + k * m * n; 8*9371c9d4SSatish Balay } 9c4762a1bSJed Brown 10*9371c9d4SSatish Balay int main(int argc, char **argv) { 11c4762a1bSJed Brown Mat A, B, C, PtAP, PtAP_copy, PtAP_squared; 12c4762a1bSJed Brown PetscInt i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1; 13c4762a1bSJed Brown PetscScalar v; 14c4762a1bSJed Brown PetscBool equal = PETSC_FALSE, mat_view = PETSC_FALSE; 15c4762a1bSJed Brown char stencil[PETSC_MAX_PATH_LEN]; 16c4762a1bSJed Brown #if defined(PETSC_USE_LOG) 17c4762a1bSJed Brown PetscLogStage fullMatMatMultStage; 18c4762a1bSJed Brown #endif 19c4762a1bSJed Brown 20327415f7SBarry Smith PetscFunctionBeginUser; 219566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 229566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL)); 239566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 249566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL)); 259566063dSJacob Faibussowitsch PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view)); 269566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL)); 27c4762a1bSJed Brown 28c4762a1bSJed Brown /* Create a aij matrix A */ 29c4762a1bSJed Brown M = N = m * n * o; 309566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 319566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N)); 329566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATAIJ)); 339566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 34c4762a1bSJed Brown 35c4762a1bSJed Brown /* Consistency checks */ 36bc30f867SBarry Smith PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!"); 37c4762a1bSJed Brown 38c4762a1bSJed Brown /************ 2D stencils ***************/ 399566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d5point", &equal)); 40c4762a1bSJed Brown if (equal) { /* 5-point stencil, 2D */ 419566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL)); 429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL)); 439566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 44c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 45*9371c9d4SSatish Balay v = -1.0; 46*9371c9d4SSatish Balay k = Ii / (m * n); 47*9371c9d4SSatish Balay j = (Ii - k * m * n) / m; 48*9371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 49*9371c9d4SSatish Balay if (i > 0) { 50*9371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 51*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 52*9371c9d4SSatish Balay } 53*9371c9d4SSatish Balay if (i < m - 1) { 54*9371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 55*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 56*9371c9d4SSatish Balay } 57*9371c9d4SSatish Balay if (j > 0) { 58*9371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 59*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 60*9371c9d4SSatish Balay } 61*9371c9d4SSatish Balay if (j < n - 1) { 62*9371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 63*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 64*9371c9d4SSatish Balay } 65*9371c9d4SSatish Balay v = 4.0; 66*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 67c4762a1bSJed Brown } 68c4762a1bSJed Brown } 699566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d9point", &equal)); 70c4762a1bSJed Brown if (equal) { /* 9-point stencil, 2D */ 719566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); 729566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); 739566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 74c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 75*9371c9d4SSatish Balay v = -1.0; 76*9371c9d4SSatish Balay k = Ii / (m * n); 77*9371c9d4SSatish Balay j = (Ii - k * m * n) / m; 78*9371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 79*9371c9d4SSatish Balay if (i > 0) { 80*9371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 81*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 82*9371c9d4SSatish Balay } 83*9371c9d4SSatish Balay if (i > 0 && j > 0) { 84*9371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n); 85*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 86*9371c9d4SSatish Balay } 87*9371c9d4SSatish Balay if (j > 0) { 88*9371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 89*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 90*9371c9d4SSatish Balay } 91*9371c9d4SSatish Balay if (i < m - 1 && j > 0) { 92*9371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n); 93*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 94*9371c9d4SSatish Balay } 95*9371c9d4SSatish Balay if (i < m - 1) { 96*9371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 97*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 98*9371c9d4SSatish Balay } 99*9371c9d4SSatish Balay if (i < m - 1 && j < n - 1) { 100*9371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n); 101*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 102*9371c9d4SSatish Balay } 103*9371c9d4SSatish Balay if (j < n - 1) { 104*9371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 105*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 106*9371c9d4SSatish Balay } 107*9371c9d4SSatish Balay if (i > 0 && j < n - 1) { 108*9371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n); 109*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 110*9371c9d4SSatish Balay } 111*9371c9d4SSatish Balay v = 8.0; 112*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 113c4762a1bSJed Brown } 114c4762a1bSJed Brown } 1159566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d9point2", &equal)); 116c4762a1bSJed Brown if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ 1179566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); 1189566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); 1199566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 120c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 121*9371c9d4SSatish Balay v = -1.0; 122*9371c9d4SSatish Balay k = Ii / (m * n); 123*9371c9d4SSatish Balay j = (Ii - k * m * n) / m; 124*9371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 125*9371c9d4SSatish Balay if (i > 0) { 126*9371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 127*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 128*9371c9d4SSatish Balay } 129*9371c9d4SSatish Balay if (i > 1) { 130*9371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n); 131*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 132*9371c9d4SSatish Balay } 133*9371c9d4SSatish Balay if (i < m - 1) { 134*9371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 135*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 136*9371c9d4SSatish Balay } 137*9371c9d4SSatish Balay if (i < m - 2) { 138*9371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n); 139*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 140*9371c9d4SSatish Balay } 141*9371c9d4SSatish Balay if (j > 0) { 142*9371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 143*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 144*9371c9d4SSatish Balay } 145*9371c9d4SSatish Balay if (j > 1) { 146*9371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n); 147*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 148*9371c9d4SSatish Balay } 149*9371c9d4SSatish Balay if (j < n - 1) { 150*9371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 151*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 152*9371c9d4SSatish Balay } 153*9371c9d4SSatish Balay if (j < n - 2) { 154*9371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n); 155*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 156*9371c9d4SSatish Balay } 157*9371c9d4SSatish Balay v = 8.0; 158*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 159c4762a1bSJed Brown } 160c4762a1bSJed Brown } 1619566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d13point", &equal)); 162c4762a1bSJed Brown if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ 1639566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); 1649566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); 1659566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 166c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 167*9371c9d4SSatish Balay v = -1.0; 168*9371c9d4SSatish Balay k = Ii / (m * n); 169*9371c9d4SSatish Balay j = (Ii - k * m * n) / m; 170*9371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 171*9371c9d4SSatish Balay if (i > 0) { 172*9371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 173*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 174*9371c9d4SSatish Balay } 175*9371c9d4SSatish Balay if (i > 1) { 176*9371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n); 177*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 178*9371c9d4SSatish Balay } 179*9371c9d4SSatish Balay if (i > 2) { 180*9371c9d4SSatish Balay J = global_index(i - 3, j, k, m, n); 181*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 182*9371c9d4SSatish Balay } 183*9371c9d4SSatish Balay if (i < m - 1) { 184*9371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 185*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 186*9371c9d4SSatish Balay } 187*9371c9d4SSatish Balay if (i < m - 2) { 188*9371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n); 189*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 190*9371c9d4SSatish Balay } 191*9371c9d4SSatish Balay if (i < m - 3) { 192*9371c9d4SSatish Balay J = global_index(i + 3, j, k, m, n); 193*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 194*9371c9d4SSatish Balay } 195*9371c9d4SSatish Balay if (j > 0) { 196*9371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 197*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 198*9371c9d4SSatish Balay } 199*9371c9d4SSatish Balay if (j > 1) { 200*9371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n); 201*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 202*9371c9d4SSatish Balay } 203*9371c9d4SSatish Balay if (j > 2) { 204*9371c9d4SSatish Balay J = global_index(i, j - 3, k, m, n); 205*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 206*9371c9d4SSatish Balay } 207*9371c9d4SSatish Balay if (j < n - 1) { 208*9371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 209*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 210*9371c9d4SSatish Balay } 211*9371c9d4SSatish Balay if (j < n - 2) { 212*9371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n); 213*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 214*9371c9d4SSatish Balay } 215*9371c9d4SSatish Balay if (j < n - 3) { 216*9371c9d4SSatish Balay J = global_index(i, j + 3, k, m, n); 217*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 218*9371c9d4SSatish Balay } 219*9371c9d4SSatish Balay v = 12.0; 220*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 221c4762a1bSJed Brown } 222c4762a1bSJed Brown } 223c4762a1bSJed Brown /************ 3D stencils ***************/ 2249566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d7point", &equal)); 225c4762a1bSJed Brown if (equal) { /* 7-point stencil, 3D */ 2269566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL)); 2279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL)); 2289566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 229c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 230*9371c9d4SSatish Balay v = -1.0; 231*9371c9d4SSatish Balay k = Ii / (m * n); 232*9371c9d4SSatish Balay j = (Ii - k * m * n) / m; 233*9371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 234*9371c9d4SSatish Balay if (i > 0) { 235*9371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 236*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 237*9371c9d4SSatish Balay } 238*9371c9d4SSatish Balay if (i < m - 1) { 239*9371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 240*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 241*9371c9d4SSatish Balay } 242*9371c9d4SSatish Balay if (j > 0) { 243*9371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 244*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 245*9371c9d4SSatish Balay } 246*9371c9d4SSatish Balay if (j < n - 1) { 247*9371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 248*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 249*9371c9d4SSatish Balay } 250*9371c9d4SSatish Balay if (k > 0) { 251*9371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 252*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 253*9371c9d4SSatish Balay } 254*9371c9d4SSatish Balay if (k < o - 1) { 255*9371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 256*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 257*9371c9d4SSatish Balay } 258*9371c9d4SSatish Balay v = 6.0; 259*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 260c4762a1bSJed Brown } 261c4762a1bSJed Brown } 2629566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d13point", &equal)); 263c4762a1bSJed Brown if (equal) { /* 13-point stencil, 3D */ 2649566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); 2659566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); 2669566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 267c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 268*9371c9d4SSatish Balay v = -1.0; 269*9371c9d4SSatish Balay k = Ii / (m * n); 270*9371c9d4SSatish Balay j = (Ii - k * m * n) / m; 271*9371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 272*9371c9d4SSatish Balay if (i > 0) { 273*9371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 274*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 275*9371c9d4SSatish Balay } 276*9371c9d4SSatish Balay if (i > 1) { 277*9371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n); 278*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 279*9371c9d4SSatish Balay } 280*9371c9d4SSatish Balay if (i < m - 1) { 281*9371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 282*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 283*9371c9d4SSatish Balay } 284*9371c9d4SSatish Balay if (i < m - 2) { 285*9371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n); 286*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 287*9371c9d4SSatish Balay } 288*9371c9d4SSatish Balay if (j > 0) { 289*9371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 290*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 291*9371c9d4SSatish Balay } 292*9371c9d4SSatish Balay if (j > 1) { 293*9371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n); 294*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 295*9371c9d4SSatish Balay } 296*9371c9d4SSatish Balay if (j < n - 1) { 297*9371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 298*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 299*9371c9d4SSatish Balay } 300*9371c9d4SSatish Balay if (j < n - 2) { 301*9371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n); 302*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 303*9371c9d4SSatish Balay } 304*9371c9d4SSatish Balay if (k > 0) { 305*9371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 306*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 307*9371c9d4SSatish Balay } 308*9371c9d4SSatish Balay if (k > 1) { 309*9371c9d4SSatish Balay J = global_index(i, j, k - 2, m, n); 310*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 311*9371c9d4SSatish Balay } 312*9371c9d4SSatish Balay if (k < o - 1) { 313*9371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 314*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 315*9371c9d4SSatish Balay } 316*9371c9d4SSatish Balay if (k < o - 2) { 317*9371c9d4SSatish Balay J = global_index(i, j, k + 2, m, n); 318*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 319*9371c9d4SSatish Balay } 320*9371c9d4SSatish Balay v = 12.0; 321*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 322c4762a1bSJed Brown } 323c4762a1bSJed Brown } 3249566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d19point", &equal)); 325c4762a1bSJed Brown if (equal) { /* 19-point stencil, 3D */ 3269566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL)); 3279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL)); 3289566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 329c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 330*9371c9d4SSatish Balay v = -1.0; 331*9371c9d4SSatish Balay k = Ii / (m * n); 332*9371c9d4SSatish Balay j = (Ii - k * m * n) / m; 333*9371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 334c4762a1bSJed Brown /* one hop */ 335*9371c9d4SSatish Balay if (i > 0) { 336*9371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 337*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 338*9371c9d4SSatish Balay } 339*9371c9d4SSatish Balay if (i < m - 1) { 340*9371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 341*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 342*9371c9d4SSatish Balay } 343*9371c9d4SSatish Balay if (j > 0) { 344*9371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 345*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 346*9371c9d4SSatish Balay } 347*9371c9d4SSatish Balay if (j < n - 1) { 348*9371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 349*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 350*9371c9d4SSatish Balay } 351*9371c9d4SSatish Balay if (k > 0) { 352*9371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 353*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 354*9371c9d4SSatish Balay } 355*9371c9d4SSatish Balay if (k < o - 1) { 356*9371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 357*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 358*9371c9d4SSatish Balay } 359c4762a1bSJed Brown /* two hops */ 360*9371c9d4SSatish Balay if (i > 0 && j > 0) { 361*9371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n); 362*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 363*9371c9d4SSatish Balay } 364*9371c9d4SSatish Balay if (i > 0 && k > 0) { 365*9371c9d4SSatish Balay J = global_index(i - 1, j, k - 1, m, n); 366*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 367*9371c9d4SSatish Balay } 368*9371c9d4SSatish Balay if (i > 0 && j < n - 1) { 369*9371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n); 370*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 371*9371c9d4SSatish Balay } 372*9371c9d4SSatish Balay if (i > 0 && k < o - 1) { 373*9371c9d4SSatish Balay J = global_index(i - 1, j, k + 1, m, n); 374*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 375*9371c9d4SSatish Balay } 376*9371c9d4SSatish Balay if (i < m - 1 && j > 0) { 377*9371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n); 378*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 379*9371c9d4SSatish Balay } 380*9371c9d4SSatish Balay if (i < m - 1 && k > 0) { 381*9371c9d4SSatish Balay J = global_index(i + 1, j, k - 1, m, n); 382*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 383*9371c9d4SSatish Balay } 384*9371c9d4SSatish Balay if (i < m - 1 && j < n - 1) { 385*9371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n); 386*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 387*9371c9d4SSatish Balay } 388*9371c9d4SSatish Balay if (i < m - 1 && k < o - 1) { 389*9371c9d4SSatish Balay J = global_index(i + 1, j, k + 1, m, n); 390*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 391*9371c9d4SSatish Balay } 392*9371c9d4SSatish Balay if (j > 0 && k > 0) { 393*9371c9d4SSatish Balay J = global_index(i, j - 1, k - 1, m, n); 394*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 395*9371c9d4SSatish Balay } 396*9371c9d4SSatish Balay if (j > 0 && k < o - 1) { 397*9371c9d4SSatish Balay J = global_index(i, j - 1, k + 1, m, n); 398*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 399*9371c9d4SSatish Balay } 400*9371c9d4SSatish Balay if (j < n - 1 && k > 0) { 401*9371c9d4SSatish Balay J = global_index(i, j + 1, k - 1, m, n); 402*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 403*9371c9d4SSatish Balay } 404*9371c9d4SSatish Balay if (j < n - 1 && k < o - 1) { 405*9371c9d4SSatish Balay J = global_index(i, j + 1, k + 1, m, n); 406*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 407*9371c9d4SSatish Balay } 408*9371c9d4SSatish Balay v = 18.0; 409*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 410c4762a1bSJed Brown } 411c4762a1bSJed Brown } 4129566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d27point", &equal)); 413c4762a1bSJed Brown if (equal) { /* 27-point stencil, 3D */ 4149566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL)); 4159566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL)); 4169566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 417c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) { 418*9371c9d4SSatish Balay v = -1.0; 419*9371c9d4SSatish Balay k = Ii / (m * n); 420*9371c9d4SSatish Balay j = (Ii - k * m * n) / m; 421*9371c9d4SSatish Balay i = (Ii - k * m * n - j * m); 422c4762a1bSJed Brown if (k > 0) { 423c4762a1bSJed Brown if (j > 0) { 424*9371c9d4SSatish Balay if (i > 0) { 425*9371c9d4SSatish Balay J = global_index(i - 1, j - 1, k - 1, m, n); 426*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 427*9371c9d4SSatish Balay } 428*9371c9d4SSatish Balay J = global_index(i, j - 1, k - 1, m, n); 429*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 430*9371c9d4SSatish Balay if (i < m - 1) { 431*9371c9d4SSatish Balay J = global_index(i + 1, j - 1, k - 1, m, n); 432*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 433*9371c9d4SSatish Balay } 434c4762a1bSJed Brown } 435c4762a1bSJed Brown { 436*9371c9d4SSatish Balay if (i > 0) { 437*9371c9d4SSatish Balay J = global_index(i - 1, j, k - 1, m, n); 438*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 439*9371c9d4SSatish Balay } 440*9371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n); 441*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 442*9371c9d4SSatish Balay if (i < m - 1) { 443*9371c9d4SSatish Balay J = global_index(i + 1, j, k - 1, m, n); 444*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 445*9371c9d4SSatish Balay } 446c4762a1bSJed Brown } 447c4762a1bSJed Brown if (j < n - 1) { 448*9371c9d4SSatish Balay if (i > 0) { 449*9371c9d4SSatish Balay J = global_index(i - 1, j + 1, k - 1, m, n); 450*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 451*9371c9d4SSatish Balay } 452*9371c9d4SSatish Balay J = global_index(i, j + 1, k - 1, m, n); 453*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 454*9371c9d4SSatish Balay if (i < m - 1) { 455*9371c9d4SSatish Balay J = global_index(i + 1, j + 1, k - 1, m, n); 456*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 457*9371c9d4SSatish Balay } 458c4762a1bSJed Brown } 459c4762a1bSJed Brown } 460c4762a1bSJed Brown { 461c4762a1bSJed Brown if (j > 0) { 462*9371c9d4SSatish Balay if (i > 0) { 463*9371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n); 464*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 465*9371c9d4SSatish Balay } 466*9371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n); 467*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 468*9371c9d4SSatish Balay if (i < m - 1) { 469*9371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n); 470*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 471*9371c9d4SSatish Balay } 472c4762a1bSJed Brown } 473c4762a1bSJed Brown { 474*9371c9d4SSatish Balay if (i > 0) { 475*9371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n); 476*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 477*9371c9d4SSatish Balay } 478*9371c9d4SSatish Balay J = global_index(i, j, k, m, n); 479*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 480*9371c9d4SSatish Balay if (i < m - 1) { 481*9371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n); 482*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 483*9371c9d4SSatish Balay } 484c4762a1bSJed Brown } 485c4762a1bSJed Brown if (j < n - 1) { 486*9371c9d4SSatish Balay if (i > 0) { 487*9371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n); 488*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 489*9371c9d4SSatish Balay } 490*9371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n); 491*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 492*9371c9d4SSatish Balay if (i < m - 1) { 493*9371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n); 494*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 495*9371c9d4SSatish Balay } 496c4762a1bSJed Brown } 497c4762a1bSJed Brown } 498c4762a1bSJed Brown if (k < o - 1) { 499c4762a1bSJed Brown if (j > 0) { 500*9371c9d4SSatish Balay if (i > 0) { 501*9371c9d4SSatish Balay J = global_index(i - 1, j - 1, k + 1, m, n); 502*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 503*9371c9d4SSatish Balay } 504*9371c9d4SSatish Balay J = global_index(i, j - 1, k + 1, m, n); 505*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 506*9371c9d4SSatish Balay if (i < m - 1) { 507*9371c9d4SSatish Balay J = global_index(i + 1, j - 1, k + 1, m, n); 508*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 509*9371c9d4SSatish Balay } 510c4762a1bSJed Brown } 511c4762a1bSJed Brown { 512*9371c9d4SSatish Balay if (i > 0) { 513*9371c9d4SSatish Balay J = global_index(i - 1, j, k + 1, m, n); 514*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 515*9371c9d4SSatish Balay } 516*9371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n); 517*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 518*9371c9d4SSatish Balay if (i < m - 1) { 519*9371c9d4SSatish Balay J = global_index(i + 1, j, k + 1, m, n); 520*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 521*9371c9d4SSatish Balay } 522c4762a1bSJed Brown } 523c4762a1bSJed Brown if (j < n - 1) { 524*9371c9d4SSatish Balay if (i > 0) { 525*9371c9d4SSatish Balay J = global_index(i - 1, j + 1, k + 1, m, n); 526*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 527*9371c9d4SSatish Balay } 528*9371c9d4SSatish Balay J = global_index(i, j + 1, k + 1, m, n); 529*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 530*9371c9d4SSatish Balay if (i < m - 1) { 531*9371c9d4SSatish Balay J = global_index(i + 1, j + 1, k + 1, m, n); 532*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 533c4762a1bSJed Brown } 534c4762a1bSJed Brown } 535*9371c9d4SSatish Balay } 536*9371c9d4SSatish Balay v = 26.0; 537*9371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 538c4762a1bSJed Brown } 539c4762a1bSJed Brown } 5409566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 5419566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 542c4762a1bSJed Brown 543c4762a1bSJed Brown /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ 5449566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 545c4762a1bSJed Brown 5469566063dSJacob Faibussowitsch PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage)); 547c4762a1bSJed Brown 548c4762a1bSJed Brown /* Test C = A*B */ 5499566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(fullMatMatMultStage)); 5509566063dSJacob Faibussowitsch PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &C)); 551c4762a1bSJed Brown 552c4762a1bSJed Brown /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ 5539566063dSJacob Faibussowitsch PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP)); 5549566063dSJacob Faibussowitsch PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy)); 5559566063dSJacob Faibussowitsch PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP_squared)); 556c4762a1bSJed Brown 5579566063dSJacob Faibussowitsch PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD)); 5589566063dSJacob Faibussowitsch PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD)); 559c4762a1bSJed Brown 5609566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP_squared)); 5619566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP_copy)); 5629566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP)); 5639566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 5649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 5659566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 5669566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 567b122ec5aSJacob Faibussowitsch return 0; 568c4762a1bSJed Brown } 569c4762a1bSJed Brown 570c4762a1bSJed Brown /*TEST 571c4762a1bSJed Brown 572c4762a1bSJed Brown test: 573c4762a1bSJed Brown suffix: 1 574c4762a1bSJed Brown nsize: 1 575c20d7725SJed Brown args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted 576c4762a1bSJed Brown 577c4762a1bSJed Brown test: 578c4762a1bSJed Brown suffix: 2 579c4762a1bSJed Brown nsize: 1 580c4762a1bSJed Brown args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge 581c4762a1bSJed Brown 582c4762a1bSJed Brown test: 583c4762a1bSJed Brown suffix: 3 584c4762a1bSJed Brown nsize: 4 585c4762a1bSJed Brown args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi 586c4762a1bSJed Brown 587c4762a1bSJed Brown TEST*/ 588