xref: /petsc/src/mat/tests/ex226.c (revision b122ec5aa1bd4469eb4e0673542fb7de3f411254)
1c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
2c4762a1bSJed Brown 
3c4762a1bSJed Brown #include <petscmat.h>
4c4762a1bSJed Brown 
5c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
6c4762a1bSJed Brown int global_index(PetscInt i,PetscInt j,PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; }
7c4762a1bSJed Brown 
8c4762a1bSJed Brown int main(int argc,char **argv)
9c4762a1bSJed Brown {
10c4762a1bSJed Brown   Mat            A,B,C,PtAP,PtAP_copy,PtAP_squared;
11c4762a1bSJed Brown   PetscInt       i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1;
12c4762a1bSJed Brown   PetscScalar    v;
13c4762a1bSJed Brown   PetscBool      equal=PETSC_FALSE,mat_view=PETSC_FALSE;
14c4762a1bSJed Brown   char           stencil[PETSC_MAX_PATH_LEN];
15c4762a1bSJed Brown #if defined(PETSC_USE_LOG)
16c4762a1bSJed Brown   PetscLogStage  fullMatMatMultStage;
17c4762a1bSJed Brown #endif
18c4762a1bSJed Brown 
19*b122ec5aSJacob Faibussowitsch   CHKERRQ(PetscInitialize(&argc,&argv,(char*)0,help));
205f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL));
215f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL));
225f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL));
235f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view));
245f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetString(NULL,NULL,"-stencil",stencil,sizeof(stencil),NULL));
25c4762a1bSJed Brown 
26c4762a1bSJed Brown   /* Create a aij matrix A */
27c4762a1bSJed Brown   M    = N = m*n*o;
285f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreate(PETSC_COMM_WORLD,&A));
295f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N));
305f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetType(A,MATAIJ));
315f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetFromOptions(A));
32c4762a1bSJed Brown 
33c4762a1bSJed Brown   /* Consistency checks */
342c71b3e2SJacob Faibussowitsch   PetscCheckFalse(o < 1 || m < 1 || n < 1,PETSC_COMM_WORLD,PETSC_ERR_USER,"Dimensions need to be larger than zero!");
35c4762a1bSJed Brown 
36c4762a1bSJed Brown   /************ 2D stencils ***************/
375f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrcmp(stencil, "2d5point", &equal));
38c4762a1bSJed Brown   if (equal) {   /* 5-point stencil, 2D */
395f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMPIAIJSetPreallocation(A,5,NULL,5,NULL));
405f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJSetPreallocation(A,5,NULL));
415f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend));
42c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
43c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
445f80ce2aSJacob Faibussowitsch       if (i>0)   {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
455f80ce2aSJacob Faibussowitsch       if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
465f80ce2aSJacob Faibussowitsch       if (j>0)   {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
475f80ce2aSJacob Faibussowitsch       if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
485f80ce2aSJacob Faibussowitsch       v = 4.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES));
49c4762a1bSJed Brown     }
50c4762a1bSJed Brown   }
515f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrcmp(stencil, "2d9point", &equal));
52c4762a1bSJed Brown   if (equal) {      /* 9-point stencil, 2D */
535f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMPIAIJSetPreallocation(A,9,NULL,9,NULL));
545f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJSetPreallocation(A,9,NULL));
555f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend));
56c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
57c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
585f80ce2aSJacob Faibussowitsch       if (i>0)            {J = global_index(i-1,j,  k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
595f80ce2aSJacob Faibussowitsch       if (i>0 && j>0)   {J = global_index(i-1,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
605f80ce2aSJacob Faibussowitsch       if (j>0)            {J = global_index(i,  j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
615f80ce2aSJacob Faibussowitsch       if (i<m-1 && j>0)   {J = global_index(i+1,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
625f80ce2aSJacob Faibussowitsch       if (i<m-1)          {J = global_index(i+1,j,  k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
635f80ce2aSJacob Faibussowitsch       if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
645f80ce2aSJacob Faibussowitsch       if (j<n-1)          {J = global_index(i,  j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
655f80ce2aSJacob Faibussowitsch       if (i>0 && j<n-1) {J = global_index(i-1,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
665f80ce2aSJacob Faibussowitsch       v = 8.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES));
67c4762a1bSJed Brown     }
68c4762a1bSJed Brown   }
695f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrcmp(stencil, "2d9point2", &equal));
70c4762a1bSJed Brown   if (equal) {      /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
715f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMPIAIJSetPreallocation(A,9,NULL,9,NULL));
725f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJSetPreallocation(A,9,NULL));
735f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend));
74c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
75c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
765f80ce2aSJacob Faibussowitsch       if (i>0)   {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
775f80ce2aSJacob Faibussowitsch       if (i>1)   {J = global_index(i-2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
785f80ce2aSJacob Faibussowitsch       if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
795f80ce2aSJacob Faibussowitsch       if (i<m-2) {J = global_index(i+2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
805f80ce2aSJacob Faibussowitsch       if (j>0)   {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
815f80ce2aSJacob Faibussowitsch       if (j>1)   {J = global_index(i,j-2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
825f80ce2aSJacob Faibussowitsch       if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
835f80ce2aSJacob Faibussowitsch       if (j<n-2) {J = global_index(i,j+2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
845f80ce2aSJacob Faibussowitsch       v = 8.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES));
85c4762a1bSJed Brown     }
86c4762a1bSJed Brown   }
875f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrcmp(stencil, "2d13point", &equal));
88c4762a1bSJed Brown   if (equal) {      /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
895f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMPIAIJSetPreallocation(A,13,NULL,13,NULL));
905f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJSetPreallocation(A,13,NULL));
915f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend));
92c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
93c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
945f80ce2aSJacob Faibussowitsch       if (i>0)   {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
955f80ce2aSJacob Faibussowitsch       if (i>1)   {J = global_index(i-2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
965f80ce2aSJacob Faibussowitsch       if (i>2)   {J = global_index(i-3,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
975f80ce2aSJacob Faibussowitsch       if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
985f80ce2aSJacob Faibussowitsch       if (i<m-2) {J = global_index(i+2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
995f80ce2aSJacob Faibussowitsch       if (i<m-3) {J = global_index(i+3,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1005f80ce2aSJacob Faibussowitsch       if (j>0)   {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1015f80ce2aSJacob Faibussowitsch       if (j>1)   {J = global_index(i,j-2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1025f80ce2aSJacob Faibussowitsch       if (j>2)   {J = global_index(i,j-3,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1035f80ce2aSJacob Faibussowitsch       if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1045f80ce2aSJacob Faibussowitsch       if (j<n-2) {J = global_index(i,j+2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1055f80ce2aSJacob Faibussowitsch       if (j<n-3) {J = global_index(i,j+3,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1065f80ce2aSJacob Faibussowitsch       v = 12.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES));
107c4762a1bSJed Brown     }
108c4762a1bSJed Brown   }
109c4762a1bSJed Brown   /************ 3D stencils ***************/
1105f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrcmp(stencil, "3d7point", &equal));
111c4762a1bSJed Brown   if (equal) {      /* 7-point stencil, 3D */
1125f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMPIAIJSetPreallocation(A,7,NULL,7,NULL));
1135f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJSetPreallocation(A,7,NULL));
1145f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend));
115c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
116c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
1175f80ce2aSJacob Faibussowitsch       if (i>0)   {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1185f80ce2aSJacob Faibussowitsch       if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1195f80ce2aSJacob Faibussowitsch       if (j>0)   {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1205f80ce2aSJacob Faibussowitsch       if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1215f80ce2aSJacob Faibussowitsch       if (k>0)   {J = global_index(i,j,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1225f80ce2aSJacob Faibussowitsch       if (k<o-1) {J = global_index(i,j,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1235f80ce2aSJacob Faibussowitsch       v = 6.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES));
124c4762a1bSJed Brown     }
125c4762a1bSJed Brown   }
1265f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrcmp(stencil, "3d13point", &equal));
127c4762a1bSJed Brown   if (equal) {      /* 13-point stencil, 3D */
1285f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMPIAIJSetPreallocation(A,13,NULL,13,NULL));
1295f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJSetPreallocation(A,13,NULL));
1305f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend));
131c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
132c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
1335f80ce2aSJacob Faibussowitsch       if (i>0)   {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1345f80ce2aSJacob Faibussowitsch       if (i>1)   {J = global_index(i-2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1355f80ce2aSJacob Faibussowitsch       if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1365f80ce2aSJacob Faibussowitsch       if (i<m-2) {J = global_index(i+2,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1375f80ce2aSJacob Faibussowitsch       if (j>0)   {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1385f80ce2aSJacob Faibussowitsch       if (j>1)   {J = global_index(i,j-2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1395f80ce2aSJacob Faibussowitsch       if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1405f80ce2aSJacob Faibussowitsch       if (j<n-2) {J = global_index(i,j+2,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1415f80ce2aSJacob Faibussowitsch       if (k>0)   {J = global_index(i,j,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1425f80ce2aSJacob Faibussowitsch       if (k>1)   {J = global_index(i,j,k-2,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1435f80ce2aSJacob Faibussowitsch       if (k<o-1) {J = global_index(i,j,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1445f80ce2aSJacob Faibussowitsch       if (k<o-2) {J = global_index(i,j,k+2,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1455f80ce2aSJacob Faibussowitsch       v = 12.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES));
146c4762a1bSJed Brown     }
147c4762a1bSJed Brown   }
1485f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrcmp(stencil, "3d19point", &equal));
149c4762a1bSJed Brown   if (equal) {      /* 19-point stencil, 3D */
1505f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMPIAIJSetPreallocation(A,19,NULL,19,NULL));
1515f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJSetPreallocation(A,19,NULL));
1525f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend));
153c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
154c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
155c4762a1bSJed Brown       /* one hop */
1565f80ce2aSJacob Faibussowitsch       if (i>0)   {J = global_index(i-1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1575f80ce2aSJacob Faibussowitsch       if (i<m-1) {J = global_index(i+1,j,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1585f80ce2aSJacob Faibussowitsch       if (j>0)   {J = global_index(i,j-1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1595f80ce2aSJacob Faibussowitsch       if (j<n-1) {J = global_index(i,j+1,k,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1605f80ce2aSJacob Faibussowitsch       if (k>0)   {J = global_index(i,j,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1615f80ce2aSJacob Faibussowitsch       if (k<o-1) {J = global_index(i,j,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
162c4762a1bSJed Brown       /* two hops */
1635f80ce2aSJacob Faibussowitsch       if (i>0   && j>0)   {J = global_index(i-1,j-1,k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1645f80ce2aSJacob Faibussowitsch       if (i>0   && k>0)   {J = global_index(i-1,j,  k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1655f80ce2aSJacob Faibussowitsch       if (i>0   && j<n-1) {J = global_index(i-1,j+1,k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1665f80ce2aSJacob Faibussowitsch       if (i>0   && k<o-1) {J = global_index(i-1,j,  k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1675f80ce2aSJacob Faibussowitsch       if (i<m-1 && j>0)   {J = global_index(i+1,j-1,k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1685f80ce2aSJacob Faibussowitsch       if (i<m-1 && k>0)   {J = global_index(i+1,j,  k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1695f80ce2aSJacob Faibussowitsch       if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1705f80ce2aSJacob Faibussowitsch       if (i<m-1 && k<o-1) {J = global_index(i+1,j,  k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1715f80ce2aSJacob Faibussowitsch       if (j>0   && k>0)   {J = global_index(i,  j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1725f80ce2aSJacob Faibussowitsch       if (j>0   && k<o-1) {J = global_index(i,  j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1735f80ce2aSJacob Faibussowitsch       if (j<n-1 && k>0)   {J = global_index(i,  j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1745f80ce2aSJacob Faibussowitsch       if (j<n-1 && k<o-1) {J = global_index(i,  j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1755f80ce2aSJacob Faibussowitsch       v = 18.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES));
176c4762a1bSJed Brown     }
177c4762a1bSJed Brown   }
1785f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrcmp(stencil, "3d27point", &equal));
179c4762a1bSJed Brown   if (equal) {      /* 27-point stencil, 3D */
1805f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMPIAIJSetPreallocation(A,27,NULL,27,NULL));
1815f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJSetPreallocation(A,27,NULL));
1825f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetOwnershipRange(A,&Istart,&Iend));
183c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
184c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
185c4762a1bSJed Brown       if (k>0) {
186c4762a1bSJed Brown         if (j>0) {
1875f80ce2aSJacob Faibussowitsch           if (i>0)   {J = global_index(i-1,j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1885f80ce2aSJacob Faibussowitsch                       J = global_index(i,  j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));
1895f80ce2aSJacob Faibussowitsch           if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
190c4762a1bSJed Brown         }
191c4762a1bSJed Brown         {
1925f80ce2aSJacob Faibussowitsch           if (i>0)   {J = global_index(i-1,j,  k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1935f80ce2aSJacob Faibussowitsch                       J = global_index(i,  j,  k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));
1945f80ce2aSJacob Faibussowitsch           if (i<m-1) {J = global_index(i+1,j,  k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
195c4762a1bSJed Brown         }
196c4762a1bSJed Brown         if (j<n-1) {
1975f80ce2aSJacob Faibussowitsch           if (i>0)   {J = global_index(i-1,j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
1985f80ce2aSJacob Faibussowitsch                       J = global_index(i,  j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));
1995f80ce2aSJacob Faibussowitsch           if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
200c4762a1bSJed Brown         }
201c4762a1bSJed Brown       }
202c4762a1bSJed Brown       {
203c4762a1bSJed Brown         if (j>0) {
2045f80ce2aSJacob Faibussowitsch           if (i>0)   {J = global_index(i-1,j-1,k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
2055f80ce2aSJacob Faibussowitsch                       J = global_index(i,  j-1,k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));
2065f80ce2aSJacob Faibussowitsch           if (i<m-1) {J = global_index(i+1,j-1,k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
207c4762a1bSJed Brown         }
208c4762a1bSJed Brown         {
2095f80ce2aSJacob Faibussowitsch           if (i>0)   {J = global_index(i-1,j,  k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
2105f80ce2aSJacob Faibussowitsch                       J = global_index(i,  j,  k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));
2115f80ce2aSJacob Faibussowitsch           if (i<m-1) {J = global_index(i+1,j,  k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
212c4762a1bSJed Brown         }
213c4762a1bSJed Brown         if (j<n-1) {
2145f80ce2aSJacob Faibussowitsch           if (i>0)   {J = global_index(i-1,j+1,k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
2155f80ce2aSJacob Faibussowitsch                       J = global_index(i,  j+1,k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));
2165f80ce2aSJacob Faibussowitsch           if (i<m-1) {J = global_index(i+1,j+1,k  ,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
217c4762a1bSJed Brown         }
218c4762a1bSJed Brown       }
219c4762a1bSJed Brown       if (k<o-1) {
220c4762a1bSJed Brown         if (j>0) {
2215f80ce2aSJacob Faibussowitsch           if (i>0)   {J = global_index(i-1,j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
2225f80ce2aSJacob Faibussowitsch                       J = global_index(i,  j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));
2235f80ce2aSJacob Faibussowitsch           if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
224c4762a1bSJed Brown         }
225c4762a1bSJed Brown         {
2265f80ce2aSJacob Faibussowitsch           if (i>0)   {J = global_index(i-1,j,  k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
2275f80ce2aSJacob Faibussowitsch                       J = global_index(i,  j,  k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));
2285f80ce2aSJacob Faibussowitsch           if (i<m-1) {J = global_index(i+1,j,  k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
229c4762a1bSJed Brown         }
230c4762a1bSJed Brown         if (j<n-1) {
2315f80ce2aSJacob Faibussowitsch           if (i>0)   {J = global_index(i-1,j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
2325f80ce2aSJacob Faibussowitsch                       J = global_index(i,  j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));
2335f80ce2aSJacob Faibussowitsch           if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); CHKERRQ(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));}
234c4762a1bSJed Brown         }
235c4762a1bSJed Brown       }
2365f80ce2aSJacob Faibussowitsch       v = 26.0; CHKERRQ(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES));
237c4762a1bSJed Brown     }
238c4762a1bSJed Brown   }
2395f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
2405f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
241c4762a1bSJed Brown 
242c4762a1bSJed Brown   /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
2435f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDuplicate(A,MAT_COPY_VALUES,&B));
244c4762a1bSJed Brown 
2455f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage));
246c4762a1bSJed Brown 
247c4762a1bSJed Brown   /* Test C = A*B */
2485f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLogStagePush(fullMatMatMultStage));
2495f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C));
250c4762a1bSJed Brown 
251c4762a1bSJed Brown   /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C)  */
2525f80ce2aSJacob Faibussowitsch   CHKERRQ(MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP));
2535f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy));
2545f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared));
255c4762a1bSJed Brown 
2565f80ce2aSJacob Faibussowitsch   CHKERRQ(MatView(C,PETSC_VIEWER_STDOUT_WORLD));
2575f80ce2aSJacob Faibussowitsch   CHKERRQ(MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD));
258c4762a1bSJed Brown 
2595f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&PtAP_squared));
2605f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&PtAP_copy));
2615f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&PtAP));
2625f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&C));
2635f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&B));
2645f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&A));
265*b122ec5aSJacob Faibussowitsch   CHKERRQ(PetscFinalize());
266*b122ec5aSJacob Faibussowitsch   return 0;
267c4762a1bSJed Brown }
268c4762a1bSJed Brown 
269c4762a1bSJed Brown /*TEST
270c4762a1bSJed Brown 
271c4762a1bSJed Brown  test:
272c4762a1bSJed Brown       suffix: 1
273c4762a1bSJed Brown       nsize: 1
274c20d7725SJed Brown       args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
275c4762a1bSJed Brown 
276c4762a1bSJed Brown  test:
277c4762a1bSJed Brown        suffix: 2
278c4762a1bSJed Brown        nsize: 1
279c4762a1bSJed Brown        args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
280c4762a1bSJed Brown 
281c4762a1bSJed Brown  test:
282c4762a1bSJed Brown       suffix: 3
283c4762a1bSJed Brown       nsize: 4
284c4762a1bSJed Brown       args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
285c4762a1bSJed Brown 
286c4762a1bSJed Brown TEST*/
287