xref: /petsc/src/mat/tests/ex226.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1*c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
2*c4762a1bSJed Brown 
3*c4762a1bSJed Brown #include <petscmat.h>
4*c4762a1bSJed Brown 
5*c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
6*c4762a1bSJed Brown int global_index(PetscInt i,PetscInt j,PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; }
7*c4762a1bSJed Brown 
8*c4762a1bSJed Brown int main(int argc,char **argv)
9*c4762a1bSJed Brown {
10*c4762a1bSJed Brown   Mat            A,B,C,PtAP,PtAP_copy,PtAP_squared;
11*c4762a1bSJed Brown   PetscInt       i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1;
12*c4762a1bSJed Brown   PetscScalar    v;
13*c4762a1bSJed Brown   PetscErrorCode ierr;
14*c4762a1bSJed Brown   PetscBool      equal=PETSC_FALSE,mat_view=PETSC_FALSE;
15*c4762a1bSJed Brown   char           stencil[PETSC_MAX_PATH_LEN];
16*c4762a1bSJed Brown #if defined(PETSC_USE_LOG)
17*c4762a1bSJed Brown   PetscLogStage  fullMatMatMultStage;
18*c4762a1bSJed Brown #endif
19*c4762a1bSJed Brown 
20*c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
21*c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr);
22*c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
23*c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL);CHKERRQ(ierr);
24*c4762a1bSJed Brown   ierr = PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view);CHKERRQ(ierr);
25*c4762a1bSJed Brown   ierr = PetscOptionsGetString(NULL,NULL,"-stencil",stencil,PETSC_MAX_PATH_LEN,NULL);CHKERRQ(ierr);
26*c4762a1bSJed Brown 
27*c4762a1bSJed Brown   /* Create a aij matrix A */
28*c4762a1bSJed Brown   M    = N = m*n*o;
29*c4762a1bSJed Brown   ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
30*c4762a1bSJed Brown   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
31*c4762a1bSJed Brown   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
32*c4762a1bSJed Brown   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
33*c4762a1bSJed Brown 
34*c4762a1bSJed Brown   /* Consistency checks */
35*c4762a1bSJed Brown   if (o < 1 || m < 1 || n < 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Dimensions need to be larger than zero!");
36*c4762a1bSJed Brown 
37*c4762a1bSJed Brown   /************ 2D stencils ***************/
38*c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "2d5point", &equal);CHKERRQ(ierr);
39*c4762a1bSJed Brown   if (equal) {   /* 5-point stencil, 2D */
40*c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr);
41*c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr);
42*c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
43*c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
44*c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
45*c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
46*c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
47*c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
48*c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
49*c4762a1bSJed Brown       v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
50*c4762a1bSJed Brown     }
51*c4762a1bSJed Brown   }
52*c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "2d9point", &equal);CHKERRQ(ierr);
53*c4762a1bSJed Brown   if (equal) {      /* 9-point stencil, 2D */
54*c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr);
55*c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr);
56*c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
57*c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
58*c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
59*c4762a1bSJed Brown       if (i>0)            {J = global_index(i-1,j,  k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
60*c4762a1bSJed Brown       if (i>0   && j>0)   {J = global_index(i-1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
61*c4762a1bSJed Brown       if (         j>0)   {J = global_index(i,  j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
62*c4762a1bSJed Brown       if (i<m-1 && j>0)   {J = global_index(i+1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
63*c4762a1bSJed Brown       if (i<m-1)          {J = global_index(i+1,j,  k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
64*c4762a1bSJed Brown       if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
65*c4762a1bSJed Brown       if (j<n-1)          {J = global_index(i,  j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
66*c4762a1bSJed Brown       if (i>0   && j<n-1) {J = global_index(i-1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
67*c4762a1bSJed Brown       v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
68*c4762a1bSJed Brown     }
69*c4762a1bSJed Brown   }
70*c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "2d9point2", &equal);CHKERRQ(ierr);
71*c4762a1bSJed Brown   if (equal) {      /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
72*c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr);
73*c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr);
74*c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
75*c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
76*c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
77*c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
78*c4762a1bSJed Brown       if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
79*c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
80*c4762a1bSJed Brown       if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
81*c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
82*c4762a1bSJed Brown       if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
83*c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
84*c4762a1bSJed Brown       if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
85*c4762a1bSJed Brown       v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
86*c4762a1bSJed Brown     }
87*c4762a1bSJed Brown   }
88*c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "2d13point", &equal);CHKERRQ(ierr);
89*c4762a1bSJed Brown   if (equal) {      /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
90*c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr);
91*c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr);
92*c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
93*c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
94*c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
95*c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
96*c4762a1bSJed Brown       if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
97*c4762a1bSJed Brown       if (i>2)   {J = global_index(i-3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
98*c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
99*c4762a1bSJed Brown       if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
100*c4762a1bSJed Brown       if (i<m-3) {J = global_index(i+3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
101*c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
102*c4762a1bSJed Brown       if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
103*c4762a1bSJed Brown       if (j>2)   {J = global_index(i,j-3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
104*c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
105*c4762a1bSJed Brown       if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
106*c4762a1bSJed Brown       if (j<n-3) {J = global_index(i,j+3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
107*c4762a1bSJed Brown       v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
108*c4762a1bSJed Brown     }
109*c4762a1bSJed Brown   }
110*c4762a1bSJed Brown   /************ 3D stencils ***************/
111*c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "3d7point", &equal);CHKERRQ(ierr);
112*c4762a1bSJed Brown   if (equal) {      /* 7-point stencil, 3D */
113*c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,7,NULL,7,NULL);CHKERRQ(ierr);
114*c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,7,NULL);CHKERRQ(ierr);
115*c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
116*c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
117*c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
118*c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
119*c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
120*c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
121*c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
122*c4762a1bSJed Brown       if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
123*c4762a1bSJed Brown       if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
124*c4762a1bSJed Brown       v = 6.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
125*c4762a1bSJed Brown     }
126*c4762a1bSJed Brown   }
127*c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "3d13point", &equal);CHKERRQ(ierr);
128*c4762a1bSJed Brown   if (equal) {      /* 13-point stencil, 3D */
129*c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr);
130*c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr);
131*c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
132*c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
133*c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
134*c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
135*c4762a1bSJed Brown       if (i>1)   {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
136*c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
137*c4762a1bSJed Brown       if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
138*c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
139*c4762a1bSJed Brown       if (j>1)   {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
140*c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
141*c4762a1bSJed Brown       if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
142*c4762a1bSJed Brown       if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
143*c4762a1bSJed Brown       if (k>1)   {J = global_index(i,j,k-2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
144*c4762a1bSJed Brown       if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
145*c4762a1bSJed Brown       if (k<o-2) {J = global_index(i,j,k+2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
146*c4762a1bSJed Brown       v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
147*c4762a1bSJed Brown     }
148*c4762a1bSJed Brown   }
149*c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "3d19point", &equal);CHKERRQ(ierr);
150*c4762a1bSJed Brown   if (equal) {      /* 19-point stencil, 3D */
151*c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,19,NULL,19,NULL);CHKERRQ(ierr);
152*c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,19,NULL);CHKERRQ(ierr);
153*c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
154*c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
155*c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
156*c4762a1bSJed Brown       /* one hop */
157*c4762a1bSJed Brown       if (i>0)   {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
158*c4762a1bSJed Brown       if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
159*c4762a1bSJed Brown       if (j>0)   {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
160*c4762a1bSJed Brown       if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
161*c4762a1bSJed Brown       if (k>0)   {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
162*c4762a1bSJed Brown       if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
163*c4762a1bSJed Brown       /* two hops */
164*c4762a1bSJed Brown       if (i>0   && j>0)   {J = global_index(i-1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
165*c4762a1bSJed Brown       if (i>0   && k>0)   {J = global_index(i-1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
166*c4762a1bSJed Brown       if (i>0   && j<n-1) {J = global_index(i-1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
167*c4762a1bSJed Brown       if (i>0   && k<o-1) {J = global_index(i-1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
168*c4762a1bSJed Brown       if (i<m-1 && j>0)   {J = global_index(i+1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
169*c4762a1bSJed Brown       if (i<m-1 && k>0)   {J = global_index(i+1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
170*c4762a1bSJed Brown       if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
171*c4762a1bSJed Brown       if (i<m-1 && k<o-1) {J = global_index(i+1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
172*c4762a1bSJed Brown       if (j>0   && k>0)   {J = global_index(i,  j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
173*c4762a1bSJed Brown       if (j>0   && k<o-1) {J = global_index(i,  j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
174*c4762a1bSJed Brown       if (j<n-1 && k>0)   {J = global_index(i,  j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
175*c4762a1bSJed Brown       if (j<n-1 && k<o-1) {J = global_index(i,  j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
176*c4762a1bSJed Brown       v = 18.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
177*c4762a1bSJed Brown     }
178*c4762a1bSJed Brown   }
179*c4762a1bSJed Brown   ierr = PetscStrcmp(stencil, "3d27point", &equal);CHKERRQ(ierr);
180*c4762a1bSJed Brown   if (equal) {      /* 27-point stencil, 3D */
181*c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(A,27,NULL,27,NULL);CHKERRQ(ierr);
182*c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(A,27,NULL);CHKERRQ(ierr);
183*c4762a1bSJed Brown     ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
184*c4762a1bSJed Brown     for (Ii=Istart; Ii<Iend; Ii++) {
185*c4762a1bSJed Brown       v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m);
186*c4762a1bSJed Brown       if (k>0) {
187*c4762a1bSJed Brown         if (j>0) {
188*c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
189*c4762a1bSJed Brown                       J = global_index(i,  j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
190*c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
191*c4762a1bSJed Brown         }
192*c4762a1bSJed Brown         {
193*c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
194*c4762a1bSJed Brown                       J = global_index(i,  j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
195*c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j,  k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
196*c4762a1bSJed Brown         }
197*c4762a1bSJed Brown         if (j<n-1) {
198*c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
199*c4762a1bSJed Brown                       J = global_index(i,  j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
200*c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
201*c4762a1bSJed Brown         }
202*c4762a1bSJed Brown       }
203*c4762a1bSJed Brown       {
204*c4762a1bSJed Brown         if (j>0) {
205*c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
206*c4762a1bSJed Brown                       J = global_index(i,  j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
207*c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j-1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
208*c4762a1bSJed Brown         }
209*c4762a1bSJed Brown         {
210*c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
211*c4762a1bSJed Brown                       J = global_index(i,  j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
212*c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j,  k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
213*c4762a1bSJed Brown         }
214*c4762a1bSJed Brown         if (j<n-1) {
215*c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
216*c4762a1bSJed Brown                       J = global_index(i,  j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
217*c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j+1,k  ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
218*c4762a1bSJed Brown         }
219*c4762a1bSJed Brown       }
220*c4762a1bSJed Brown       if (k<o-1) {
221*c4762a1bSJed Brown         if (j>0) {
222*c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
223*c4762a1bSJed Brown                       J = global_index(i,  j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
224*c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
225*c4762a1bSJed Brown         }
226*c4762a1bSJed Brown         {
227*c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
228*c4762a1bSJed Brown                       J = global_index(i,  j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
229*c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j,  k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
230*c4762a1bSJed Brown         }
231*c4762a1bSJed Brown         if (j<n-1) {
232*c4762a1bSJed Brown           if (i>0)   {J = global_index(i-1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
233*c4762a1bSJed Brown                       J = global_index(i,  j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
234*c4762a1bSJed Brown           if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
235*c4762a1bSJed Brown         }
236*c4762a1bSJed Brown       }
237*c4762a1bSJed Brown       v = 26.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
238*c4762a1bSJed Brown     }
239*c4762a1bSJed Brown   }
240*c4762a1bSJed Brown   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
241*c4762a1bSJed Brown   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
242*c4762a1bSJed Brown 
243*c4762a1bSJed Brown   /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
244*c4762a1bSJed Brown   ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
245*c4762a1bSJed Brown 
246*c4762a1bSJed Brown   ierr = PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage);CHKERRQ(ierr);
247*c4762a1bSJed Brown 
248*c4762a1bSJed Brown   /* Test C = A*B */
249*c4762a1bSJed Brown   ierr = PetscLogStagePush(fullMatMatMultStage);CHKERRQ(ierr);
250*c4762a1bSJed Brown   ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C);CHKERRQ(ierr);
251*c4762a1bSJed Brown 
252*c4762a1bSJed Brown   /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C)  */
253*c4762a1bSJed Brown   ierr = MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP);CHKERRQ(ierr);
254*c4762a1bSJed Brown   ierr = MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy);CHKERRQ(ierr);
255*c4762a1bSJed Brown   ierr = MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared);CHKERRQ(ierr);
256*c4762a1bSJed Brown 
257*c4762a1bSJed Brown   ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
258*c4762a1bSJed Brown   ierr = MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
259*c4762a1bSJed Brown 
260*c4762a1bSJed Brown 
261*c4762a1bSJed Brown   ierr = MatDestroy(&PtAP_squared);CHKERRQ(ierr);
262*c4762a1bSJed Brown   ierr = MatDestroy(&PtAP_copy);CHKERRQ(ierr);
263*c4762a1bSJed Brown   ierr = MatDestroy(&PtAP);CHKERRQ(ierr);
264*c4762a1bSJed Brown   ierr = MatDestroy(&C);CHKERRQ(ierr);
265*c4762a1bSJed Brown   ierr = MatDestroy(&B);CHKERRQ(ierr);
266*c4762a1bSJed Brown   ierr = MatDestroy(&A);CHKERRQ(ierr);
267*c4762a1bSJed Brown   ierr = PetscFinalize();
268*c4762a1bSJed Brown   return ierr;
269*c4762a1bSJed Brown }
270*c4762a1bSJed Brown 
271*c4762a1bSJed Brown 
272*c4762a1bSJed Brown /*TEST
273*c4762a1bSJed Brown 
274*c4762a1bSJed Brown  test:
275*c4762a1bSJed Brown       suffix: 1
276*c4762a1bSJed Brown       nsize: 1
277*c4762a1bSJed Brown       args: -m 8 -n 8 -stencil 2d5point -matmatmult_via combined
278*c4762a1bSJed Brown 
279*c4762a1bSJed Brown  test:
280*c4762a1bSJed Brown        suffix: 2
281*c4762a1bSJed Brown        nsize: 1
282*c4762a1bSJed Brown        args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
283*c4762a1bSJed Brown 
284*c4762a1bSJed Brown  test:
285*c4762a1bSJed Brown       suffix: 3
286*c4762a1bSJed Brown       nsize: 4
287*c4762a1bSJed Brown       args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
288*c4762a1bSJed Brown 
289*c4762a1bSJed Brown 
290*c4762a1bSJed Brown 
291*c4762a1bSJed Brown TEST*/
292