1c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ 6c4762a1bSJed Brown int global_index(PetscInt i,PetscInt j,PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; } 7c4762a1bSJed Brown 8c4762a1bSJed Brown int main(int argc,char **argv) 9c4762a1bSJed Brown { 10c4762a1bSJed Brown Mat A,B,C,PtAP,PtAP_copy,PtAP_squared; 11c4762a1bSJed Brown PetscInt i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1; 12c4762a1bSJed Brown PetscScalar v; 13c4762a1bSJed Brown PetscErrorCode ierr; 14c4762a1bSJed Brown PetscBool equal=PETSC_FALSE,mat_view=PETSC_FALSE; 15c4762a1bSJed Brown char stencil[PETSC_MAX_PATH_LEN]; 16c4762a1bSJed Brown #if defined(PETSC_USE_LOG) 17c4762a1bSJed Brown PetscLogStage fullMatMatMultStage; 18c4762a1bSJed Brown #endif 19c4762a1bSJed Brown 20c4762a1bSJed Brown ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 21*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL)); 22*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 23*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL)); 24*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view)); 25*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetString(NULL,NULL,"-stencil",stencil,sizeof(stencil),NULL)); 26c4762a1bSJed Brown 27c4762a1bSJed Brown /* Create a aij matrix A */ 28c4762a1bSJed Brown M = N = m*n*o; 29*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(PETSC_COMM_WORLD,&A)); 30*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N)); 31*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(A,MATAIJ)); 32*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetFromOptions(A)); 33c4762a1bSJed Brown 34c4762a1bSJed Brown /* Consistency checks */ 352c71b3e2SJacob Faibussowitsch PetscCheckFalse(o < 1 || m < 1 || n < 1,PETSC_COMM_WORLD,PETSC_ERR_USER,"Dimensions need to be larger than zero!"); 36c4762a1bSJed Brown 37c4762a1bSJed Brown /************ 2D stencils ***************/ 38*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "2d5point", &equal)); 39c4762a1bSJed Brown if (equal) { /* 5-point stencil, 2D */ 40*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,5,NULL,5,NULL)); 41*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,5,NULL)); 42*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 43c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 44c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 45*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 46*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 47*5f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 48*5f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 49*5f80ce2aSJacob Faibussowitsch v = 4.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 50c4762a1bSJed Brown } 51c4762a1bSJed Brown } 52*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "2d9point", &equal)); 53c4762a1bSJed Brown if (equal) { /* 9-point stencil, 2D */ 54*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,9,NULL,9,NULL)); 55*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,9,NULL)); 56*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 57c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 58c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 59*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 60*5f80ce2aSJacob Faibussowitsch if (i>0 && j>0) {J = global_index(i-1,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 61*5f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i, j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 62*5f80ce2aSJacob Faibussowitsch if (i<m-1 && j>0) {J = global_index(i+1,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 63*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 64*5f80ce2aSJacob Faibussowitsch if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 65*5f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i, j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 66*5f80ce2aSJacob Faibussowitsch if (i>0 && j<n-1) {J = global_index(i-1,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 67*5f80ce2aSJacob Faibussowitsch v = 8.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 68c4762a1bSJed Brown } 69c4762a1bSJed Brown } 70*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "2d9point2", &equal)); 71c4762a1bSJed Brown if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ 72*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,9,NULL,9,NULL)); 73*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,9,NULL)); 74*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 75c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 76c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 77*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 78*5f80ce2aSJacob Faibussowitsch if (i>1) {J = global_index(i-2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 79*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 80*5f80ce2aSJacob Faibussowitsch if (i<m-2) {J = global_index(i+2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 81*5f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 82*5f80ce2aSJacob Faibussowitsch if (j>1) {J = global_index(i,j-2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 83*5f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 84*5f80ce2aSJacob Faibussowitsch if (j<n-2) {J = global_index(i,j+2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 85*5f80ce2aSJacob Faibussowitsch v = 8.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 86c4762a1bSJed Brown } 87c4762a1bSJed Brown } 88*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "2d13point", &equal)); 89c4762a1bSJed Brown if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ 90*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,13,NULL,13,NULL)); 91*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,13,NULL)); 92*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 93c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 94c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 95*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 96*5f80ce2aSJacob Faibussowitsch if (i>1) {J = global_index(i-2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 97*5f80ce2aSJacob Faibussowitsch if (i>2) {J = global_index(i-3,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 98*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 99*5f80ce2aSJacob Faibussowitsch if (i<m-2) {J = global_index(i+2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 100*5f80ce2aSJacob Faibussowitsch if (i<m-3) {J = global_index(i+3,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 101*5f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 102*5f80ce2aSJacob Faibussowitsch if (j>1) {J = global_index(i,j-2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 103*5f80ce2aSJacob Faibussowitsch if (j>2) {J = global_index(i,j-3,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 104*5f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 105*5f80ce2aSJacob Faibussowitsch if (j<n-2) {J = global_index(i,j+2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 106*5f80ce2aSJacob Faibussowitsch if (j<n-3) {J = global_index(i,j+3,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 107*5f80ce2aSJacob Faibussowitsch v = 12.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 108c4762a1bSJed Brown } 109c4762a1bSJed Brown } 110c4762a1bSJed Brown /************ 3D stencils ***************/ 111*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "3d7point", &equal)); 112c4762a1bSJed Brown if (equal) { /* 7-point stencil, 3D */ 113*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,7,NULL,7,NULL)); 114*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,7,NULL)); 115*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 116c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 117c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 118*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 119*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 120*5f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 121*5f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 122*5f80ce2aSJacob Faibussowitsch if (k>0) {J = global_index(i,j,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 123*5f80ce2aSJacob Faibussowitsch if (k<o-1) {J = global_index(i,j,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 124*5f80ce2aSJacob Faibussowitsch v = 6.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 125c4762a1bSJed Brown } 126c4762a1bSJed Brown } 127*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "3d13point", &equal)); 128c4762a1bSJed Brown if (equal) { /* 13-point stencil, 3D */ 129*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,13,NULL,13,NULL)); 130*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,13,NULL)); 131*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 132c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 133c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 134*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 135*5f80ce2aSJacob Faibussowitsch if (i>1) {J = global_index(i-2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 136*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 137*5f80ce2aSJacob Faibussowitsch if (i<m-2) {J = global_index(i+2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 138*5f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 139*5f80ce2aSJacob Faibussowitsch if (j>1) {J = global_index(i,j-2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 140*5f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 141*5f80ce2aSJacob Faibussowitsch if (j<n-2) {J = global_index(i,j+2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 142*5f80ce2aSJacob Faibussowitsch if (k>0) {J = global_index(i,j,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 143*5f80ce2aSJacob Faibussowitsch if (k>1) {J = global_index(i,j,k-2,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 144*5f80ce2aSJacob Faibussowitsch if (k<o-1) {J = global_index(i,j,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 145*5f80ce2aSJacob Faibussowitsch if (k<o-2) {J = global_index(i,j,k+2,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 146*5f80ce2aSJacob Faibussowitsch v = 12.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 147c4762a1bSJed Brown } 148c4762a1bSJed Brown } 149*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "3d19point", &equal)); 150c4762a1bSJed Brown if (equal) { /* 19-point stencil, 3D */ 151*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,19,NULL,19,NULL)); 152*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,19,NULL)); 153*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 154c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 155c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 156c4762a1bSJed Brown /* one hop */ 157*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 158*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 159*5f80ce2aSJacob Faibussowitsch if (j>0) {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 160*5f80ce2aSJacob Faibussowitsch if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 161*5f80ce2aSJacob Faibussowitsch if (k>0) {J = global_index(i,j,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 162*5f80ce2aSJacob Faibussowitsch if (k<o-1) {J = global_index(i,j,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 163c4762a1bSJed Brown /* two hops */ 164*5f80ce2aSJacob Faibussowitsch if (i>0 && j>0) {J = global_index(i-1,j-1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 165*5f80ce2aSJacob Faibussowitsch if (i>0 && k>0) {J = global_index(i-1,j, k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 166*5f80ce2aSJacob Faibussowitsch if (i>0 && j<n-1) {J = global_index(i-1,j+1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 167*5f80ce2aSJacob Faibussowitsch if (i>0 && k<o-1) {J = global_index(i-1,j, k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 168*5f80ce2aSJacob Faibussowitsch if (i<m-1 && j>0) {J = global_index(i+1,j-1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 169*5f80ce2aSJacob Faibussowitsch if (i<m-1 && k>0) {J = global_index(i+1,j, k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 170*5f80ce2aSJacob Faibussowitsch if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 171*5f80ce2aSJacob Faibussowitsch if (i<m-1 && k<o-1) {J = global_index(i+1,j, k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 172*5f80ce2aSJacob Faibussowitsch if (j>0 && k>0) {J = global_index(i, j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 173*5f80ce2aSJacob Faibussowitsch if (j>0 && k<o-1) {J = global_index(i, j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 174*5f80ce2aSJacob Faibussowitsch if (j<n-1 && k>0) {J = global_index(i, j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 175*5f80ce2aSJacob Faibussowitsch if (j<n-1 && k<o-1) {J = global_index(i, j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 176*5f80ce2aSJacob Faibussowitsch v = 18.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 177c4762a1bSJed Brown } 178c4762a1bSJed Brown } 179*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(stencil, "3d27point", &equal)); 180c4762a1bSJed Brown if (equal) { /* 27-point stencil, 3D */ 181*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,27,NULL,27,NULL)); 182*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,27,NULL)); 183*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend)); 184c4762a1bSJed Brown for (Ii=Istart; Ii<Iend; Ii++) { 185c4762a1bSJed Brown v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 186c4762a1bSJed Brown if (k>0) { 187c4762a1bSJed Brown if (j>0) { 188*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 189*5f80ce2aSJacob Faibussowitsch J = global_index(i, j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 190*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 191c4762a1bSJed Brown } 192c4762a1bSJed Brown { 193*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 194*5f80ce2aSJacob Faibussowitsch J = global_index(i, j, k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 195*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 196c4762a1bSJed Brown } 197c4762a1bSJed Brown if (j<n-1) { 198*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 199*5f80ce2aSJacob Faibussowitsch J = global_index(i, j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 200*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 201c4762a1bSJed Brown } 202c4762a1bSJed Brown } 203c4762a1bSJed Brown { 204c4762a1bSJed Brown if (j>0) { 205*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j-1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 206*5f80ce2aSJacob Faibussowitsch J = global_index(i, j-1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 207*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j-1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 208c4762a1bSJed Brown } 209c4762a1bSJed Brown { 210*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 211*5f80ce2aSJacob Faibussowitsch J = global_index(i, j, k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 212*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 213c4762a1bSJed Brown } 214c4762a1bSJed Brown if (j<n-1) { 215*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j+1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 216*5f80ce2aSJacob Faibussowitsch J = global_index(i, j+1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 217*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j+1,k ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 218c4762a1bSJed Brown } 219c4762a1bSJed Brown } 220c4762a1bSJed Brown if (k<o-1) { 221c4762a1bSJed Brown if (j>0) { 222*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 223*5f80ce2aSJacob Faibussowitsch J = global_index(i, j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 224*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 225c4762a1bSJed Brown } 226c4762a1bSJed Brown { 227*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j, k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 228*5f80ce2aSJacob Faibussowitsch J = global_index(i, j, k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 229*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j, k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 230c4762a1bSJed Brown } 231c4762a1bSJed Brown if (j<n-1) { 232*5f80ce2aSJacob Faibussowitsch if (i>0) {J = global_index(i-1,j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 233*5f80ce2aSJacob Faibussowitsch J = global_index(i, j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES)); 234*5f80ce2aSJacob Faibussowitsch if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 235c4762a1bSJed Brown } 236c4762a1bSJed Brown } 237*5f80ce2aSJacob Faibussowitsch v = 26.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 238c4762a1bSJed Brown } 239c4762a1bSJed Brown } 240*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 241*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 242c4762a1bSJed Brown 243c4762a1bSJed Brown /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ 244*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(A,MAT_COPY_VALUES,&B)); 245c4762a1bSJed Brown 246*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage)); 247c4762a1bSJed Brown 248c4762a1bSJed Brown /* Test C = A*B */ 249*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogStagePush(fullMatMatMultStage)); 250*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C)); 251c4762a1bSJed Brown 252c4762a1bSJed Brown /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ 253*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP)); 254*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy)); 255*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared)); 256c4762a1bSJed Brown 257*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(C,PETSC_VIEWER_STDOUT_WORLD)); 258*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD)); 259c4762a1bSJed Brown 260*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&PtAP_squared)); 261*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&PtAP_copy)); 262*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&PtAP)); 263*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&C)); 264*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&B)); 265*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 266c4762a1bSJed Brown ierr = PetscFinalize(); 267c4762a1bSJed Brown return ierr; 268c4762a1bSJed Brown } 269c4762a1bSJed Brown 270c4762a1bSJed Brown /*TEST 271c4762a1bSJed Brown 272c4762a1bSJed Brown test: 273c4762a1bSJed Brown suffix: 1 274c4762a1bSJed Brown nsize: 1 275c20d7725SJed Brown args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted 276c4762a1bSJed Brown 277c4762a1bSJed Brown test: 278c4762a1bSJed Brown suffix: 2 279c4762a1bSJed Brown nsize: 1 280c4762a1bSJed Brown args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge 281c4762a1bSJed Brown 282c4762a1bSJed Brown test: 283c4762a1bSJed Brown suffix: 3 284c4762a1bSJed Brown nsize: 4 285c4762a1bSJed Brown args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi 286c4762a1bSJed Brown 287c4762a1bSJed Brown TEST*/ 288