1*c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; 2*c4762a1bSJed Brown 3*c4762a1bSJed Brown #include <petscmat.h> 4*c4762a1bSJed Brown 5*c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ 6*c4762a1bSJed Brown int global_index(PetscInt i,PetscInt j,PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; } 7*c4762a1bSJed Brown 8*c4762a1bSJed Brown int main(int argc,char **argv) 9*c4762a1bSJed Brown { 10*c4762a1bSJed Brown Mat A,B,C,PtAP,PtAP_copy,PtAP_squared; 11*c4762a1bSJed Brown PetscInt i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1; 12*c4762a1bSJed Brown PetscScalar v; 13*c4762a1bSJed Brown PetscErrorCode ierr; 14*c4762a1bSJed Brown PetscBool equal=PETSC_FALSE,mat_view=PETSC_FALSE; 15*c4762a1bSJed Brown char stencil[PETSC_MAX_PATH_LEN]; 16*c4762a1bSJed Brown #if defined(PETSC_USE_LOG) 17*c4762a1bSJed Brown PetscLogStage fullMatMatMultStage; 18*c4762a1bSJed Brown #endif 19*c4762a1bSJed Brown 20*c4762a1bSJed Brown ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 21*c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); 22*c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); 23*c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL);CHKERRQ(ierr); 24*c4762a1bSJed Brown ierr = PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view);CHKERRQ(ierr); 25*c4762a1bSJed Brown ierr = PetscOptionsGetString(NULL,NULL,"-stencil",stencil,PETSC_MAX_PATH_LEN,NULL);CHKERRQ(ierr); 26*c4762a1bSJed Brown 27*c4762a1bSJed Brown /* Create a aij matrix A */ 28*c4762a1bSJed Brown M = N = m*n*o; 29*c4762a1bSJed Brown ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); 30*c4762a1bSJed Brown ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 31*c4762a1bSJed Brown ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr); 32*c4762a1bSJed Brown ierr = MatSetFromOptions(A);CHKERRQ(ierr); 33*c4762a1bSJed Brown 34*c4762a1bSJed Brown /* Consistency checks */ 35*c4762a1bSJed Brown if (o < 1 || m < 1 || n < 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Dimensions need to be larger than zero!"); 36*c4762a1bSJed Brown 37*c4762a1bSJed Brown /************ 2D stencils ***************/ 38*c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "2d5point", &equal);CHKERRQ(ierr); 39*c4762a1bSJed Brown if (equal) { /* 5-point stencil, 2D */ 40*c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr); 41*c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr); 42*c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 43*c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 44*c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 45*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 46*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 47*c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 48*c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 49*c4762a1bSJed Brown v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 50*c4762a1bSJed Brown } 51*c4762a1bSJed Brown } 52*c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "2d9point", &equal);CHKERRQ(ierr); 53*c4762a1bSJed Brown if (equal) { /* 9-point stencil, 2D */ 54*c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr); 55*c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr); 56*c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 57*c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 58*c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 59*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j, k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 60*c4762a1bSJed Brown if (i>0 && j>0) {J = global_index(i-1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 61*c4762a1bSJed Brown if ( j>0) {J = global_index(i, j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 62*c4762a1bSJed Brown if (i<m-1 && j>0) {J = global_index(i+1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 63*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j, k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 64*c4762a1bSJed Brown if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 65*c4762a1bSJed Brown if (j<n-1) {J = global_index(i, j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 66*c4762a1bSJed Brown if (i>0 && j<n-1) {J = global_index(i-1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 67*c4762a1bSJed Brown v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 68*c4762a1bSJed Brown } 69*c4762a1bSJed Brown } 70*c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "2d9point2", &equal);CHKERRQ(ierr); 71*c4762a1bSJed Brown if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ 72*c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr); 73*c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr); 74*c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 75*c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 76*c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 77*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 78*c4762a1bSJed Brown if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 79*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 80*c4762a1bSJed Brown if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 81*c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 82*c4762a1bSJed Brown if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 83*c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 84*c4762a1bSJed Brown if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 85*c4762a1bSJed Brown v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 86*c4762a1bSJed Brown } 87*c4762a1bSJed Brown } 88*c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "2d13point", &equal);CHKERRQ(ierr); 89*c4762a1bSJed Brown if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ 90*c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr); 91*c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr); 92*c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 93*c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 94*c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 95*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 96*c4762a1bSJed Brown if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 97*c4762a1bSJed Brown if (i>2) {J = global_index(i-3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 98*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 99*c4762a1bSJed Brown if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 100*c4762a1bSJed Brown if (i<m-3) {J = global_index(i+3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 101*c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 102*c4762a1bSJed Brown if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 103*c4762a1bSJed Brown if (j>2) {J = global_index(i,j-3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 104*c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 105*c4762a1bSJed Brown if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 106*c4762a1bSJed Brown if (j<n-3) {J = global_index(i,j+3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 107*c4762a1bSJed Brown v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 108*c4762a1bSJed Brown } 109*c4762a1bSJed Brown } 110*c4762a1bSJed Brown /************ 3D stencils ***************/ 111*c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "3d7point", &equal);CHKERRQ(ierr); 112*c4762a1bSJed Brown if (equal) { /* 7-point stencil, 3D */ 113*c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,7,NULL,7,NULL);CHKERRQ(ierr); 114*c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,7,NULL);CHKERRQ(ierr); 115*c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 116*c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 117*c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 118*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 119*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 120*c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 121*c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 122*c4762a1bSJed Brown if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 123*c4762a1bSJed Brown if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 124*c4762a1bSJed Brown v = 6.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 125*c4762a1bSJed Brown } 126*c4762a1bSJed Brown } 127*c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "3d13point", &equal);CHKERRQ(ierr); 128*c4762a1bSJed Brown if (equal) { /* 13-point stencil, 3D */ 129*c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr); 130*c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr); 131*c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 132*c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 133*c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 134*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 135*c4762a1bSJed Brown if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 136*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 137*c4762a1bSJed Brown if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 138*c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 139*c4762a1bSJed Brown if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 140*c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 141*c4762a1bSJed Brown if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 142*c4762a1bSJed Brown if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 143*c4762a1bSJed Brown if (k>1) {J = global_index(i,j,k-2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 144*c4762a1bSJed Brown if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 145*c4762a1bSJed Brown if (k<o-2) {J = global_index(i,j,k+2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 146*c4762a1bSJed Brown v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 147*c4762a1bSJed Brown } 148*c4762a1bSJed Brown } 149*c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "3d19point", &equal);CHKERRQ(ierr); 150*c4762a1bSJed Brown if (equal) { /* 19-point stencil, 3D */ 151*c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,19,NULL,19,NULL);CHKERRQ(ierr); 152*c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,19,NULL);CHKERRQ(ierr); 153*c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 154*c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 155*c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 156*c4762a1bSJed Brown /* one hop */ 157*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 158*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 159*c4762a1bSJed Brown if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 160*c4762a1bSJed Brown if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 161*c4762a1bSJed Brown if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 162*c4762a1bSJed Brown if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 163*c4762a1bSJed Brown /* two hops */ 164*c4762a1bSJed Brown if (i>0 && j>0) {J = global_index(i-1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 165*c4762a1bSJed Brown if (i>0 && k>0) {J = global_index(i-1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 166*c4762a1bSJed Brown if (i>0 && j<n-1) {J = global_index(i-1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 167*c4762a1bSJed Brown if (i>0 && k<o-1) {J = global_index(i-1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 168*c4762a1bSJed Brown if (i<m-1 && j>0) {J = global_index(i+1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 169*c4762a1bSJed Brown if (i<m-1 && k>0) {J = global_index(i+1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 170*c4762a1bSJed Brown if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 171*c4762a1bSJed Brown if (i<m-1 && k<o-1) {J = global_index(i+1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 172*c4762a1bSJed Brown if (j>0 && k>0) {J = global_index(i, j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 173*c4762a1bSJed Brown if (j>0 && k<o-1) {J = global_index(i, j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 174*c4762a1bSJed Brown if (j<n-1 && k>0) {J = global_index(i, j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 175*c4762a1bSJed Brown if (j<n-1 && k<o-1) {J = global_index(i, j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 176*c4762a1bSJed Brown v = 18.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 177*c4762a1bSJed Brown } 178*c4762a1bSJed Brown } 179*c4762a1bSJed Brown ierr = PetscStrcmp(stencil, "3d27point", &equal);CHKERRQ(ierr); 180*c4762a1bSJed Brown if (equal) { /* 27-point stencil, 3D */ 181*c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,27,NULL,27,NULL);CHKERRQ(ierr); 182*c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,27,NULL);CHKERRQ(ierr); 183*c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 184*c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 185*c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 186*c4762a1bSJed Brown if (k>0) { 187*c4762a1bSJed Brown if (j>0) { 188*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 189*c4762a1bSJed Brown J = global_index(i, j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 190*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 191*c4762a1bSJed Brown } 192*c4762a1bSJed Brown { 193*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 194*c4762a1bSJed Brown J = global_index(i, j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 195*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 196*c4762a1bSJed Brown } 197*c4762a1bSJed Brown if (j<n-1) { 198*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 199*c4762a1bSJed Brown J = global_index(i, j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 200*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 201*c4762a1bSJed Brown } 202*c4762a1bSJed Brown } 203*c4762a1bSJed Brown { 204*c4762a1bSJed Brown if (j>0) { 205*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 206*c4762a1bSJed Brown J = global_index(i, j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 207*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 208*c4762a1bSJed Brown } 209*c4762a1bSJed Brown { 210*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 211*c4762a1bSJed Brown J = global_index(i, j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 212*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 213*c4762a1bSJed Brown } 214*c4762a1bSJed Brown if (j<n-1) { 215*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 216*c4762a1bSJed Brown J = global_index(i, j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 217*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 218*c4762a1bSJed Brown } 219*c4762a1bSJed Brown } 220*c4762a1bSJed Brown if (k<o-1) { 221*c4762a1bSJed Brown if (j>0) { 222*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 223*c4762a1bSJed Brown J = global_index(i, j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 224*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 225*c4762a1bSJed Brown } 226*c4762a1bSJed Brown { 227*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 228*c4762a1bSJed Brown J = global_index(i, j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 229*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 230*c4762a1bSJed Brown } 231*c4762a1bSJed Brown if (j<n-1) { 232*c4762a1bSJed Brown if (i>0) {J = global_index(i-1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 233*c4762a1bSJed Brown J = global_index(i, j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 234*c4762a1bSJed Brown if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 235*c4762a1bSJed Brown } 236*c4762a1bSJed Brown } 237*c4762a1bSJed Brown v = 26.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 238*c4762a1bSJed Brown } 239*c4762a1bSJed Brown } 240*c4762a1bSJed Brown ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 241*c4762a1bSJed Brown ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 242*c4762a1bSJed Brown 243*c4762a1bSJed Brown /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ 244*c4762a1bSJed Brown ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 245*c4762a1bSJed Brown 246*c4762a1bSJed Brown ierr = PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage);CHKERRQ(ierr); 247*c4762a1bSJed Brown 248*c4762a1bSJed Brown /* Test C = A*B */ 249*c4762a1bSJed Brown ierr = PetscLogStagePush(fullMatMatMultStage);CHKERRQ(ierr); 250*c4762a1bSJed Brown ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C);CHKERRQ(ierr); 251*c4762a1bSJed Brown 252*c4762a1bSJed Brown /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ 253*c4762a1bSJed Brown ierr = MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP);CHKERRQ(ierr); 254*c4762a1bSJed Brown ierr = MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy);CHKERRQ(ierr); 255*c4762a1bSJed Brown ierr = MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared);CHKERRQ(ierr); 256*c4762a1bSJed Brown 257*c4762a1bSJed Brown ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 258*c4762a1bSJed Brown ierr = MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 259*c4762a1bSJed Brown 260*c4762a1bSJed Brown 261*c4762a1bSJed Brown ierr = MatDestroy(&PtAP_squared);CHKERRQ(ierr); 262*c4762a1bSJed Brown ierr = MatDestroy(&PtAP_copy);CHKERRQ(ierr); 263*c4762a1bSJed Brown ierr = MatDestroy(&PtAP);CHKERRQ(ierr); 264*c4762a1bSJed Brown ierr = MatDestroy(&C);CHKERRQ(ierr); 265*c4762a1bSJed Brown ierr = MatDestroy(&B);CHKERRQ(ierr); 266*c4762a1bSJed Brown ierr = MatDestroy(&A);CHKERRQ(ierr); 267*c4762a1bSJed Brown ierr = PetscFinalize(); 268*c4762a1bSJed Brown return ierr; 269*c4762a1bSJed Brown } 270*c4762a1bSJed Brown 271*c4762a1bSJed Brown 272*c4762a1bSJed Brown /*TEST 273*c4762a1bSJed Brown 274*c4762a1bSJed Brown test: 275*c4762a1bSJed Brown suffix: 1 276*c4762a1bSJed Brown nsize: 1 277*c4762a1bSJed Brown args: -m 8 -n 8 -stencil 2d5point -matmatmult_via combined 278*c4762a1bSJed Brown 279*c4762a1bSJed Brown test: 280*c4762a1bSJed Brown suffix: 2 281*c4762a1bSJed Brown nsize: 1 282*c4762a1bSJed Brown args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge 283*c4762a1bSJed Brown 284*c4762a1bSJed Brown test: 285*c4762a1bSJed Brown suffix: 3 286*c4762a1bSJed Brown nsize: 4 287*c4762a1bSJed Brown args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi 288*c4762a1bSJed Brown 289*c4762a1bSJed Brown 290*c4762a1bSJed Brown 291*c4762a1bSJed Brown TEST*/ 292