xref: /petsc/src/mat/tests/ex226.c (revision 2c71b3e237ead271e4f3aa1505f92bf476e3413d)
1c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
2c4762a1bSJed Brown 
3c4762a1bSJed Brown #include <petscmat.h>
4c4762a1bSJed Brown 
5c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
6c4762a1bSJed Brown int global_index(PetscInt i,PetscInt j,PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; }
7c4762a1bSJed Brown 
8c4762a1bSJed Brown int main(int argc,char **argv)
9c4762a1bSJed Brown {
10c4762a1bSJed Brown   Mat            A,B,C,PtAP,PtAP_copy,PtAP_squared;
11c4762a1bSJed Brown   PetscInt       i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1;
12c4762a1bSJed Brown   PetscScalar    v;
13c4762a1bSJed Brown   PetscErrorCode ierr;
14c4762a1bSJed Brown   PetscBool      equal=PETSC_FALSE,mat_view=PETSC_FALSE;
15c4762a1bSJed Brown   char           stencil[PETSC_MAX_PATH_LEN];
16c4762a1bSJed Brown #if defined(PETSC_USE_LOG)
17c4762a1bSJed Brown   PetscLogStage  fullMatMatMultStage;
18c4762a1bSJed Brown #endif
19c4762a1bSJed Brown 
20c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
21c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr);
22c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
23c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL);CHKERRQ(ierr);
24c4762a1bSJed Brown   ierr = PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view);CHKERRQ(ierr);
25589a23caSBarry Smith   ierr = PetscOptionsGetString(NULL,NULL,"-stencil",stencil,sizeof(stencil),NULL);CHKERRQ(ierr);
26c4762a1bSJed Brown 
27c4762a1bSJed Brown   /* Create a aij matrix A */
28c4762a1bSJed Brown   M    = N = m*n*o;
29c4762a1bSJed Brown   ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
30c4762a1bSJed Brown   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
31c4762a1bSJed Brown   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
32c4762a1bSJed Brown   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
33c4762a1bSJed Brown 
34c4762a1bSJed Brown   /* Consistency checks */
35*2c71b3e2SJacob Faibussowitsch   PetscCheckFalse(o < 1 || m < 1 || n < 1,PETSC_COMM_WORLD,PETSC_ERR_USER,"Dimensions need to be larger than zero!");
36c4762a1bSJed Brown 
37c4762a1bSJed Brown   /************ 2D stencils ***************/
38c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "2d5point", &equal);CHKERRQ(ierr);
39c4762a1bSJed Brown   if (equal) {   /* 5-point stencil, 2D */
40c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr);
41c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr);
42c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
43c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
44c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
45c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
46c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
47c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
48c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
49c4762a1bSJed Brown       v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
50c4762a1bSJed Brown     }
51c4762a1bSJed Brown   }
52c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "2d9point", &equal);CHKERRQ(ierr);
53c4762a1bSJed Brown   if (equal) {      /* 9-point stencil, 2D */
54c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr);
55c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr);
56c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
57c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
58c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
59c4762a1bSJed Brown       if (i>0)            {J = global_index(i-1,j,  k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
60c4762a1bSJed Brown       if (i>0 && j>0)   {J = global_index(i-1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
61c4762a1bSJed Brown       if (j>0)            {J = global_index(i,  j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
62c4762a1bSJed Brown       if (i<m-1 && j>0)   {J = global_index(i+1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
63c4762a1bSJed Brown       if (i<m-1)          {J = global_index(i+1,j,  k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
64c4762a1bSJed Brown       if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
65c4762a1bSJed Brown       if (j<n-1)          {J = global_index(i,  j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
66c4762a1bSJed Brown       if (i>0 && j<n-1) {J = global_index(i-1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
67c4762a1bSJed Brown       v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
68c4762a1bSJed Brown     }
69c4762a1bSJed Brown   }
70c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "2d9point2", &equal);CHKERRQ(ierr);
71c4762a1bSJed Brown   if (equal) {      /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
72c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr);
73c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr);
74c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
75c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
76c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
77c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
78c4762a1bSJed Brown       if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
79c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
80c4762a1bSJed Brown       if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
81c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
82c4762a1bSJed Brown       if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
83c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
84c4762a1bSJed Brown       if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
85c4762a1bSJed Brown       v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
86c4762a1bSJed Brown     }
87c4762a1bSJed Brown   }
88c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "2d13point", &equal);CHKERRQ(ierr);
89c4762a1bSJed Brown   if (equal) {      /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
90c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr);
91c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr);
92c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
93c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
94c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
95c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
96c4762a1bSJed Brown       if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
97c4762a1bSJed Brown       if (i>2)   {J = global_index(i-3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
98c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
99c4762a1bSJed Brown       if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
100c4762a1bSJed Brown       if (i<m-3) {J = global_index(i+3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
101c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
102c4762a1bSJed Brown       if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
103c4762a1bSJed Brown       if (j>2)   {J = global_index(i,j-3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
104c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
105c4762a1bSJed Brown       if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
106c4762a1bSJed Brown       if (j<n-3) {J = global_index(i,j+3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
107c4762a1bSJed Brown       v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
108c4762a1bSJed Brown     }
109c4762a1bSJed Brown   }
110c4762a1bSJed Brown   /************ 3D stencils ***************/
111c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "3d7point", &equal);CHKERRQ(ierr);
112c4762a1bSJed Brown   if (equal) {      /* 7-point stencil, 3D */
113c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,7,NULL,7,NULL);CHKERRQ(ierr);
114c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,7,NULL);CHKERRQ(ierr);
115c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
116c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
117c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
118c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
119c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
120c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
121c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
122c4762a1bSJed Brown       if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
123c4762a1bSJed Brown       if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
124c4762a1bSJed Brown       v = 6.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
125c4762a1bSJed Brown     }
126c4762a1bSJed Brown   }
127c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "3d13point", &equal);CHKERRQ(ierr);
128c4762a1bSJed Brown   if (equal) {      /* 13-point stencil, 3D */
129c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr);
130c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr);
131c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
132c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
133c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
134c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
135c4762a1bSJed Brown       if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
136c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
137c4762a1bSJed Brown       if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
138c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
139c4762a1bSJed Brown       if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
140c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
141c4762a1bSJed Brown       if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
142c4762a1bSJed Brown       if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
143c4762a1bSJed Brown       if (k>1)   {J = global_index(i,j,k-2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
144c4762a1bSJed Brown       if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
145c4762a1bSJed Brown       if (k<o-2) {J = global_index(i,j,k+2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
146c4762a1bSJed Brown       v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
147c4762a1bSJed Brown     }
148c4762a1bSJed Brown   }
149c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "3d19point", &equal);CHKERRQ(ierr);
150c4762a1bSJed Brown   if (equal) {      /* 19-point stencil, 3D */
151c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,19,NULL,19,NULL);CHKERRQ(ierr);
152c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,19,NULL);CHKERRQ(ierr);
153c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
154c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
155c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
156c4762a1bSJed Brown       /* one hop */
157c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
158c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
159c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
160c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
161c4762a1bSJed Brown       if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
162c4762a1bSJed Brown       if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
163c4762a1bSJed Brown       /* two hops */
164c4762a1bSJed Brown       if (i>0   && j>0)   {J = global_index(i-1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
165c4762a1bSJed Brown       if (i>0   && k>0)   {J = global_index(i-1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
166c4762a1bSJed Brown       if (i>0   && j<n-1) {J = global_index(i-1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
167c4762a1bSJed Brown       if (i>0   && k<o-1) {J = global_index(i-1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
168c4762a1bSJed Brown       if (i<m-1 && j>0)   {J = global_index(i+1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
169c4762a1bSJed Brown       if (i<m-1 && k>0)   {J = global_index(i+1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
170c4762a1bSJed Brown       if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
171c4762a1bSJed Brown       if (i<m-1 && k<o-1) {J = global_index(i+1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
172c4762a1bSJed Brown       if (j>0   && k>0)   {J = global_index(i,  j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
173c4762a1bSJed Brown       if (j>0   && k<o-1) {J = global_index(i,  j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
174c4762a1bSJed Brown       if (j<n-1 && k>0)   {J = global_index(i,  j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
175c4762a1bSJed Brown       if (j<n-1 && k<o-1) {J = global_index(i,  j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
176c4762a1bSJed Brown       v = 18.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
177c4762a1bSJed Brown     }
178c4762a1bSJed Brown   }
179c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "3d27point", &equal);CHKERRQ(ierr);
180c4762a1bSJed Brown   if (equal) {      /* 27-point stencil, 3D */
181c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,27,NULL,27,NULL);CHKERRQ(ierr);
182c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,27,NULL);CHKERRQ(ierr);
183c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
184c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
185c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
186c4762a1bSJed Brown       if (k>0) {
187c4762a1bSJed Brown         if (j>0) {
188c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
189c4762a1bSJed Brown                       J = global_index(i,  j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
190c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
191c4762a1bSJed Brown         }
192c4762a1bSJed Brown         {
193c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
194c4762a1bSJed Brown                       J = global_index(i,  j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
195c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
196c4762a1bSJed Brown         }
197c4762a1bSJed Brown         if (j<n-1) {
198c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
199c4762a1bSJed Brown                       J = global_index(i,  j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
200c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
201c4762a1bSJed Brown         }
202c4762a1bSJed Brown       }
203c4762a1bSJed Brown       {
204c4762a1bSJed Brown         if (j>0) {
205c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
206c4762a1bSJed Brown                       J = global_index(i,  j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
207c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
208c4762a1bSJed Brown         }
209c4762a1bSJed Brown         {
210c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
211c4762a1bSJed Brown                       J = global_index(i,  j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
212c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
213c4762a1bSJed Brown         }
214c4762a1bSJed Brown         if (j<n-1) {
215c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
216c4762a1bSJed Brown                       J = global_index(i,  j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
217c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
218c4762a1bSJed Brown         }
219c4762a1bSJed Brown       }
220c4762a1bSJed Brown       if (k<o-1) {
221c4762a1bSJed Brown         if (j>0) {
222c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
223c4762a1bSJed Brown                       J = global_index(i,  j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
224c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
225c4762a1bSJed Brown         }
226c4762a1bSJed Brown         {
227c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
228c4762a1bSJed Brown                       J = global_index(i,  j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
229c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
230c4762a1bSJed Brown         }
231c4762a1bSJed Brown         if (j<n-1) {
232c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
233c4762a1bSJed Brown                       J = global_index(i,  j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
234c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
235c4762a1bSJed Brown         }
236c4762a1bSJed Brown       }
237c4762a1bSJed Brown       v = 26.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
238c4762a1bSJed Brown     }
239c4762a1bSJed Brown   }
240c4762a1bSJed Brown   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
241c4762a1bSJed Brown   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
242c4762a1bSJed Brown 
243c4762a1bSJed Brown   /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
244c4762a1bSJed Brown   ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
245c4762a1bSJed Brown 
246c4762a1bSJed Brown   ierr = PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage);CHKERRQ(ierr);
247c4762a1bSJed Brown 
248c4762a1bSJed Brown   /* Test C = A*B */
249c4762a1bSJed Brown   ierr = PetscLogStagePush(fullMatMatMultStage);CHKERRQ(ierr);
250c4762a1bSJed Brown   ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C);CHKERRQ(ierr);
251c4762a1bSJed Brown 
252c4762a1bSJed Brown   /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C)  */
253c4762a1bSJed Brown   ierr = MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP);CHKERRQ(ierr);
254c4762a1bSJed Brown   ierr = MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy);CHKERRQ(ierr);
255c4762a1bSJed Brown   ierr = MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared);CHKERRQ(ierr);
256c4762a1bSJed Brown 
257c4762a1bSJed Brown   ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
258c4762a1bSJed Brown   ierr = MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
259c4762a1bSJed Brown 
260c4762a1bSJed Brown   ierr = MatDestroy(&PtAP_squared);CHKERRQ(ierr);
261c4762a1bSJed Brown   ierr = MatDestroy(&PtAP_copy);CHKERRQ(ierr);
262c4762a1bSJed Brown   ierr = MatDestroy(&PtAP);CHKERRQ(ierr);
263c4762a1bSJed Brown   ierr = MatDestroy(&C);CHKERRQ(ierr);
264c4762a1bSJed Brown   ierr = MatDestroy(&B);CHKERRQ(ierr);
265c4762a1bSJed Brown   ierr = MatDestroy(&A);CHKERRQ(ierr);
266c4762a1bSJed Brown   ierr = PetscFinalize();
267c4762a1bSJed Brown   return ierr;
268c4762a1bSJed Brown }
269c4762a1bSJed Brown 
270c4762a1bSJed Brown /*TEST
271c4762a1bSJed Brown 
272c4762a1bSJed Brown  test:
273c4762a1bSJed Brown       suffix: 1
274c4762a1bSJed Brown       nsize: 1
275c20d7725SJed Brown       args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
276c4762a1bSJed Brown 
277c4762a1bSJed Brown  test:
278c4762a1bSJed Brown        suffix: 2
279c4762a1bSJed Brown        nsize: 1
280c4762a1bSJed Brown        args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
281c4762a1bSJed Brown 
282c4762a1bSJed Brown  test:
283c4762a1bSJed Brown       suffix: 3
284c4762a1bSJed Brown       nsize: 4
285c4762a1bSJed Brown       args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
286c4762a1bSJed Brown 
287c4762a1bSJed Brown TEST*/
288