xref: /petsc/src/mat/tests/ex237.c (revision 56bd94f40900b7e2c0395b85dbeae1d9a0881c6c)
1*56bd94f4SPierre Jolivet static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n";
2*56bd94f4SPierre Jolivet 
3*56bd94f4SPierre Jolivet /*
4*56bd94f4SPierre Jolivet   See the paper below for more information
5*56bd94f4SPierre Jolivet 
6*56bd94f4SPierre Jolivet    "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods",
7*56bd94f4SPierre Jolivet    P. Jolivet, J. E. Roman, and S. Zampini (2020).
8*56bd94f4SPierre Jolivet */
9*56bd94f4SPierre Jolivet 
10*56bd94f4SPierre Jolivet #include <petsc.h>
11*56bd94f4SPierre Jolivet 
12*56bd94f4SPierre Jolivet #if defined(PETSC_HAVE_MKL)
13*56bd94f4SPierre Jolivet #include <mkl.h>
14*56bd94f4SPierre Jolivet #define PetscStackCallMKLSparse(func, args) do {               \
15*56bd94f4SPierre Jolivet     sparse_status_t __ierr;                                    \
16*56bd94f4SPierre Jolivet     PetscStackPush(#func);                                     \
17*56bd94f4SPierre Jolivet     __ierr = func args;                                        \
18*56bd94f4SPierre Jolivet     PetscStackPop;                                             \
19*56bd94f4SPierre Jolivet     if (__ierr != SPARSE_STATUS_SUCCESS) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \
20*56bd94f4SPierre Jolivet   } while (0)
21*56bd94f4SPierre Jolivet #else
22*56bd94f4SPierre Jolivet #define PetscStackCallMKLSparse(func, args) do {               \
23*56bd94f4SPierre Jolivet     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \
24*56bd94f4SPierre Jolivet   } while (0)
25*56bd94f4SPierre Jolivet #endif
26*56bd94f4SPierre Jolivet 
27*56bd94f4SPierre Jolivet int main(int argc, char** argv) {
28*56bd94f4SPierre Jolivet   Mat            A, C, D, E;
29*56bd94f4SPierre Jolivet   PetscInt       nbs = 10, ntype = 10, nN = 8, m, M, trial = 5;
30*56bd94f4SPierre Jolivet   PetscViewer    viewer;
31*56bd94f4SPierre Jolivet   PetscInt       bs[10], N[8];
32*56bd94f4SPierre Jolivet   char           *type[10];
33*56bd94f4SPierre Jolivet   PetscMPIInt    size;
34*56bd94f4SPierre Jolivet   PetscBool      flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl;
35*56bd94f4SPierre Jolivet   char           file[PETSC_MAX_PATH_LEN];
36*56bd94f4SPierre Jolivet   PetscErrorCode ierr;
37*56bd94f4SPierre Jolivet 
38*56bd94f4SPierre Jolivet   ierr = PetscInitialize(&argc, &argv, NULL, help);CHKERRQ(ierr);
39*56bd94f4SPierre Jolivet   if (ierr) return ierr;
40*56bd94f4SPierre Jolivet   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
41*56bd94f4SPierre Jolivet   if (size != 1) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only");
42*56bd94f4SPierre Jolivet   ierr = PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg);CHKERRQ(ierr);
43*56bd94f4SPierre Jolivet   if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option");
44*56bd94f4SPierre Jolivet   ierr = PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL);CHKERRQ(ierr);
45*56bd94f4SPierre Jolivet   ierr = PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg);CHKERRQ(ierr);
46*56bd94f4SPierre Jolivet   if (!flg) {
47*56bd94f4SPierre Jolivet     nbs = 1;
48*56bd94f4SPierre Jolivet     bs[0] = 1;
49*56bd94f4SPierre Jolivet   }
50*56bd94f4SPierre Jolivet   ierr = PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg);CHKERRQ(ierr);
51*56bd94f4SPierre Jolivet   if (!flg) {
52*56bd94f4SPierre Jolivet     nN = 8;
53*56bd94f4SPierre Jolivet     N[0] = 1;  N[1] = 2;  N[2] = 4;  N[3] = 8;
54*56bd94f4SPierre Jolivet     N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128;
55*56bd94f4SPierre Jolivet   }
56*56bd94f4SPierre Jolivet   ierr = PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg);CHKERRQ(ierr);
57*56bd94f4SPierre Jolivet   if (!flg) {
58*56bd94f4SPierre Jolivet     ntype = 1;
59*56bd94f4SPierre Jolivet     ierr = PetscStrallocpy(MATSEQAIJ, &type[0]);CHKERRQ(ierr);
60*56bd94f4SPierre Jolivet   }
61*56bd94f4SPierre Jolivet   ierr = PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL);CHKERRQ(ierr);
62*56bd94f4SPierre Jolivet   ierr = PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL);CHKERRQ(ierr);
63*56bd94f4SPierre Jolivet   ierr = PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL);CHKERRQ(ierr);
64*56bd94f4SPierre Jolivet   for (PetscInt j = 0; j < nbs; ++j) {
65*56bd94f4SPierre Jolivet     ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer);CHKERRQ(ierr);
66*56bd94f4SPierre Jolivet     ierr = MatCreate(PETSC_COMM_WORLD, &A);CHKERRQ(ierr);
67*56bd94f4SPierre Jolivet     ierr = MatSetFromOptions(A);CHKERRQ(ierr);
68*56bd94f4SPierre Jolivet     ierr = MatLoad(A, viewer);CHKERRQ(ierr);
69*56bd94f4SPierre Jolivet     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
70*56bd94f4SPierre Jolivet     ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr);
71*56bd94f4SPierre Jolivet     if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix");
72*56bd94f4SPierre Jolivet     ierr = MatGetSize(A, &m, &M);CHKERRQ(ierr);
73*56bd94f4SPierre Jolivet     if (m == M) {
74*56bd94f4SPierre Jolivet       Mat oA;
75*56bd94f4SPierre Jolivet       ierr = MatTranspose(A, MAT_INITIAL_MATRIX, &oA);CHKERRQ(ierr);
76*56bd94f4SPierre Jolivet       ierr = MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
77*56bd94f4SPierre Jolivet       ierr = MatDestroy(&oA);CHKERRQ(ierr);
78*56bd94f4SPierre Jolivet     }
79*56bd94f4SPierre Jolivet     ierr = MatGetLocalSize(A, &m, NULL);CHKERRQ(ierr);
80*56bd94f4SPierre Jolivet     ierr = MatGetSize(A, &M, NULL);CHKERRQ(ierr);
81*56bd94f4SPierre Jolivet     if (bs[j] > 1) {
82*56bd94f4SPierre Jolivet       Mat               T, Tt, B;
83*56bd94f4SPierre Jolivet       const PetscScalar *ptr;
84*56bd94f4SPierre Jolivet       PetscScalar       *val, *Aa;
85*56bd94f4SPierre Jolivet       const PetscInt    *Ai, *Aj;
86*56bd94f4SPierre Jolivet       PetscInt          An, i, k;
87*56bd94f4SPierre Jolivet       PetscBool         done;
88*56bd94f4SPierre Jolivet 
89*56bd94f4SPierre Jolivet       ierr = MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T);CHKERRQ(ierr);
90*56bd94f4SPierre Jolivet       ierr = MatSetRandom(T, NULL);CHKERRQ(ierr);
91*56bd94f4SPierre Jolivet       ierr = MatTranspose(T, MAT_INITIAL_MATRIX, &Tt);CHKERRQ(ierr);
92*56bd94f4SPierre Jolivet       ierr = MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN);CHKERRQ(ierr);
93*56bd94f4SPierre Jolivet       ierr = MatDestroy(&Tt);CHKERRQ(ierr);
94*56bd94f4SPierre Jolivet       ierr = MatDenseGetArrayRead(T, &ptr);CHKERRQ(ierr);
95*56bd94f4SPierre Jolivet       ierr = MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr);
96*56bd94f4SPierre Jolivet       if (!done || An != m) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
97*56bd94f4SPierre Jolivet       ierr = MatSeqAIJGetArray(A, &Aa);CHKERRQ(ierr);
98*56bd94f4SPierre Jolivet       ierr = MatCreate(PETSC_COMM_WORLD, &B);CHKERRQ(ierr);
99*56bd94f4SPierre Jolivet       ierr = MatSetType(B, MATSEQBAIJ);CHKERRQ(ierr);
100*56bd94f4SPierre Jolivet       ierr = MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr);
101*56bd94f4SPierre Jolivet       ierr = PetscMalloc1(Ai[An] * bs[j] * bs[j],&val);CHKERRQ(ierr);
102*56bd94f4SPierre Jolivet       for(i = 0; i < Ai[An]; ++i)
103*56bd94f4SPierre Jolivet         for(k = 0; k < bs[j] * bs[j]; ++k)
104*56bd94f4SPierre Jolivet           val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k];
105*56bd94f4SPierre Jolivet       ierr = MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE);CHKERRQ(ierr);
106*56bd94f4SPierre Jolivet       ierr = MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val);CHKERRQ(ierr);
107*56bd94f4SPierre Jolivet       ierr = PetscFree(val);CHKERRQ(ierr);
108*56bd94f4SPierre Jolivet       ierr = MatSeqAIJRestoreArray(A, &Aa);CHKERRQ(ierr);
109*56bd94f4SPierre Jolivet       ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr);
110*56bd94f4SPierre Jolivet       ierr = MatDenseRestoreArrayRead(T, &ptr);CHKERRQ(ierr);
111*56bd94f4SPierre Jolivet       ierr = MatDestroy(&T);CHKERRQ(ierr);
112*56bd94f4SPierre Jolivet       ierr = MatDestroy(&A);CHKERRQ(ierr);
113*56bd94f4SPierre Jolivet       A    = B;
114*56bd94f4SPierre Jolivet     }
115*56bd94f4SPierre Jolivet     /* reconvert back to SeqAIJ before converting to the desired type later */
116*56bd94f4SPierre Jolivet     if (!convert) E = A;
117*56bd94f4SPierre Jolivet     ierr = MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E);CHKERRQ(ierr);
118*56bd94f4SPierre Jolivet     ierr = MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE);CHKERRQ(ierr);
119*56bd94f4SPierre Jolivet     for (PetscInt i = 0; i < ntype; ++i) {
120*56bd94f4SPierre Jolivet       char        *tmp;
121*56bd94f4SPierre Jolivet       PetscInt    *ia_ptr, *ja_ptr, k;
122*56bd94f4SPierre Jolivet       PetscScalar *a_ptr;
123*56bd94f4SPierre Jolivet #if defined(PETSC_HAVE_MKL)
124*56bd94f4SPierre Jolivet       struct matrix_descr descr;
125*56bd94f4SPierre Jolivet       sparse_matrix_t     spr;
126*56bd94f4SPierre Jolivet       descr.type = SPARSE_MATRIX_TYPE_GENERAL;
127*56bd94f4SPierre Jolivet       descr.diag = SPARSE_DIAG_NON_UNIT;
128*56bd94f4SPierre Jolivet #endif
129*56bd94f4SPierre Jolivet       if (convert) {
130*56bd94f4SPierre Jolivet         ierr = MatDestroy(&A);CHKERRQ(ierr);
131*56bd94f4SPierre Jolivet       }
132*56bd94f4SPierre Jolivet       ierr = PetscStrstr(type[i], "mkl", &tmp);CHKERRQ(ierr);
133*56bd94f4SPierre Jolivet       if (tmp) {
134*56bd94f4SPierre Jolivet         size_t mlen, tlen;
135*56bd94f4SPierre Jolivet         char base[256];
136*56bd94f4SPierre Jolivet 
137*56bd94f4SPierre Jolivet         mkl  = PETSC_TRUE;
138*56bd94f4SPierre Jolivet         ierr = PetscStrlen(tmp, &mlen);CHKERRQ(ierr);
139*56bd94f4SPierre Jolivet         ierr = PetscStrlen(type[i], &tlen);CHKERRQ(ierr);
140*56bd94f4SPierre Jolivet         ierr = PetscStrncpy(base, type[i], tlen-mlen + 1);CHKERRQ(ierr);
141*56bd94f4SPierre Jolivet         ierr = MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr);
142*56bd94f4SPierre Jolivet       } else {
143*56bd94f4SPierre Jolivet         mkl  = PETSC_FALSE;
144*56bd94f4SPierre Jolivet         ierr = PetscStrstr(type[i], "maij", &tmp);CHKERRQ(ierr);
145*56bd94f4SPierre Jolivet         if (!tmp) {
146*56bd94f4SPierre Jolivet           ierr = MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr);
147*56bd94f4SPierre Jolivet         } else {
148*56bd94f4SPierre Jolivet           ierr = MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr);
149*56bd94f4SPierre Jolivet           maij = PETSC_TRUE;
150*56bd94f4SPierre Jolivet         }
151*56bd94f4SPierre Jolivet       }
152*56bd94f4SPierre Jolivet       ierr = PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "");CHKERRQ(ierr);
153*56bd94f4SPierre Jolivet       if (mkl) {
154*56bd94f4SPierre Jolivet         const PetscInt *Ai, *Aj;
155*56bd94f4SPierre Jolivet         PetscInt       An;
156*56bd94f4SPierre Jolivet         PetscBool      done;
157*56bd94f4SPierre Jolivet 
158*56bd94f4SPierre Jolivet         ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "");CHKERRQ(ierr);
159*56bd94f4SPierre Jolivet         if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented");
160*56bd94f4SPierre Jolivet         ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr);
161*56bd94f4SPierre Jolivet         ierr = MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr);
162*56bd94f4SPierre Jolivet         if (!done) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
163*56bd94f4SPierre Jolivet         ierr = PetscMalloc1(An + 1,&ia_ptr);CHKERRQ(ierr);
164*56bd94f4SPierre Jolivet         ierr = PetscMalloc1(Ai[An],&ja_ptr);CHKERRQ(ierr);
165*56bd94f4SPierre Jolivet         if (flg) { /* SeqAIJ */
166*56bd94f4SPierre Jolivet           for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k];
167*56bd94f4SPierre Jolivet           for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k];
168*56bd94f4SPierre Jolivet           ierr = MatSeqAIJGetArray(A, &a_ptr);CHKERRQ(ierr);
169*56bd94f4SPierre Jolivet           PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
170*56bd94f4SPierre Jolivet         } else {
171*56bd94f4SPierre Jolivet           ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg);CHKERRQ(ierr);
172*56bd94f4SPierre Jolivet           if (flg) {
173*56bd94f4SPierre Jolivet             for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
174*56bd94f4SPierre Jolivet             for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
175*56bd94f4SPierre Jolivet             ierr = MatSeqBAIJGetArray(A, &a_ptr);CHKERRQ(ierr);
176*56bd94f4SPierre Jolivet             PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
177*56bd94f4SPierre Jolivet           } else {
178*56bd94f4SPierre Jolivet             ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg);CHKERRQ(ierr);
179*56bd94f4SPierre Jolivet             if (flg) {
180*56bd94f4SPierre Jolivet               for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
181*56bd94f4SPierre Jolivet               for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
182*56bd94f4SPierre Jolivet               ierr = MatSeqSBAIJGetArray(A, &a_ptr);CHKERRQ(ierr);
183*56bd94f4SPierre Jolivet               PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
184*56bd94f4SPierre Jolivet #if defined(PETSC_HAVE_MKL)
185*56bd94f4SPierre Jolivet               descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
186*56bd94f4SPierre Jolivet               descr.mode = SPARSE_FILL_MODE_UPPER;
187*56bd94f4SPierre Jolivet               descr.diag = SPARSE_DIAG_NON_UNIT;
188*56bd94f4SPierre Jolivet #endif
189*56bd94f4SPierre Jolivet             }
190*56bd94f4SPierre Jolivet           }
191*56bd94f4SPierre Jolivet         }
192*56bd94f4SPierre Jolivet         ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr);
193*56bd94f4SPierre Jolivet         ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr);
194*56bd94f4SPierre Jolivet       }
195*56bd94f4SPierre Jolivet 
196*56bd94f4SPierre Jolivet       ierr = MatViewFromOptions(A, NULL, "-A_view");CHKERRQ(ierr);
197*56bd94f4SPierre Jolivet 
198*56bd94f4SPierre Jolivet       for(k = 0; k < nN; ++k) {
199*56bd94f4SPierre Jolivet         MatType       Atype, Ctype;
200*56bd94f4SPierre Jolivet         PetscInt      AM, AN, CM, CN, t;
201*56bd94f4SPierre Jolivet         PetscLogStage stage, tstage;
202*56bd94f4SPierre Jolivet         char          stage_s[256];
203*56bd94f4SPierre Jolivet 
204*56bd94f4SPierre Jolivet         ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C);CHKERRQ(ierr);
205*56bd94f4SPierre Jolivet         ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D);CHKERRQ(ierr);
206*56bd94f4SPierre Jolivet         ierr = MatSetRandom(C, NULL);CHKERRQ(ierr);
207*56bd94f4SPierre Jolivet         if (cuda) { /* convert to GPU if needed */
208*56bd94f4SPierre Jolivet           ierr = MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr);
209*56bd94f4SPierre Jolivet           ierr = MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D);CHKERRQ(ierr);
210*56bd94f4SPierre Jolivet         }
211*56bd94f4SPierre Jolivet         if (mkl) {
212*56bd94f4SPierre Jolivet           if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial));
213*56bd94f4SPierre Jolivet           else          PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial));
214*56bd94f4SPierre Jolivet           PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE));
215*56bd94f4SPierre Jolivet           PetscStackCallMKLSparse(mkl_sparse_optimize, (spr));
216*56bd94f4SPierre Jolivet         }
217*56bd94f4SPierre Jolivet         ierr = MatGetType(A, &Atype);CHKERRQ(ierr);
218*56bd94f4SPierre Jolivet         ierr = MatGetType(C, &Ctype);CHKERRQ(ierr);
219*56bd94f4SPierre Jolivet         ierr = MatGetSize(A, &AM, &AN);CHKERRQ(ierr);
220*56bd94f4SPierre Jolivet         ierr = MatGetSize(C, &CM, &CN);CHKERRQ(ierr);
221*56bd94f4SPierre Jolivet 
222*56bd94f4SPierre Jolivet         if (!maij || N[k] > 1) {
223*56bd94f4SPierre Jolivet           ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr);
224*56bd94f4SPierre Jolivet           ierr = PetscLogStageRegister(stage_s, &stage);CHKERRQ(ierr);
225*56bd94f4SPierre Jolivet         }
226*56bd94f4SPierre Jolivet         if (trans && N[k] > 1) {
227*56bd94f4SPierre Jolivet           ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr);
228*56bd94f4SPierre Jolivet           ierr = PetscLogStageRegister(stage_s, &tstage);CHKERRQ(ierr);
229*56bd94f4SPierre Jolivet         }
230*56bd94f4SPierre Jolivet 
231*56bd94f4SPierre Jolivet         /* A*B */
232*56bd94f4SPierre Jolivet         if (N[k] > 1) {
233*56bd94f4SPierre Jolivet           if (!maij) {
234*56bd94f4SPierre Jolivet             ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr);
235*56bd94f4SPierre Jolivet             ierr = MatProductSetType(D, MATPRODUCT_AB);CHKERRQ(ierr);
236*56bd94f4SPierre Jolivet             ierr = MatProductSetFromOptions(D);CHKERRQ(ierr);
237*56bd94f4SPierre Jolivet             ierr = MatProductSymbolic(D);CHKERRQ(ierr);
238*56bd94f4SPierre Jolivet           }
239*56bd94f4SPierre Jolivet 
240*56bd94f4SPierre Jolivet           if (!mkl) {
241*56bd94f4SPierre Jolivet             if (!maij) {
242*56bd94f4SPierre Jolivet               ierr = MatProductNumeric(D);CHKERRQ(ierr);
243*56bd94f4SPierre Jolivet               ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and B %s %Dx%D\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN);
244*56bd94f4SPierre Jolivet               ierr = PetscLogStagePush(stage);CHKERRQ(ierr);
245*56bd94f4SPierre Jolivet               for (t = 0; t < trial; ++t) {
246*56bd94f4SPierre Jolivet                 ierr = MatProductNumeric(D);CHKERRQ(ierr);
247*56bd94f4SPierre Jolivet               }
248*56bd94f4SPierre Jolivet               ierr = PetscLogStagePop();CHKERRQ(ierr);
249*56bd94f4SPierre Jolivet             } else {
250*56bd94f4SPierre Jolivet               Mat               E, Ct, Dt;
251*56bd94f4SPierre Jolivet               Vec               cC, cD;
252*56bd94f4SPierre Jolivet               const PetscScalar *c_ptr;
253*56bd94f4SPierre Jolivet               PetscScalar       *d_ptr;
254*56bd94f4SPierre Jolivet               ierr = MatCreateMAIJ(A, N[k], &E);CHKERRQ(ierr);
255*56bd94f4SPierre Jolivet               ierr = MatDenseGetLocalMatrix(C, &Ct);CHKERRQ(ierr);
256*56bd94f4SPierre Jolivet               ierr = MatDenseGetLocalMatrix(D, &Dt);CHKERRQ(ierr);
257*56bd94f4SPierre Jolivet               ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr);
258*56bd94f4SPierre Jolivet               ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr);
259*56bd94f4SPierre Jolivet               ierr = MatDenseGetArrayRead(Ct, &c_ptr);CHKERRQ(ierr);
260*56bd94f4SPierre Jolivet               ierr = MatDenseGetArrayWrite(Dt, &d_ptr);CHKERRQ(ierr);
261*56bd94f4SPierre Jolivet               ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC);CHKERRQ(ierr);
262*56bd94f4SPierre Jolivet               ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD);CHKERRQ(ierr);
263*56bd94f4SPierre Jolivet               ierr = MatMult(E, cC, cD);CHKERRQ(ierr);
264*56bd94f4SPierre Jolivet               ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D and B %s %Dx%D\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1);
265*56bd94f4SPierre Jolivet               ierr = PetscLogStagePush(stage);CHKERRQ(ierr);
266*56bd94f4SPierre Jolivet               for (t = 0; t < trial; ++t) {
267*56bd94f4SPierre Jolivet                 ierr = MatMult(E, cC, cD);CHKERRQ(ierr);
268*56bd94f4SPierre Jolivet               }
269*56bd94f4SPierre Jolivet               ierr = PetscLogStagePop();CHKERRQ(ierr);
270*56bd94f4SPierre Jolivet               ierr = VecDestroy(&cD);CHKERRQ(ierr);
271*56bd94f4SPierre Jolivet               ierr = VecDestroy(&cC);CHKERRQ(ierr);
272*56bd94f4SPierre Jolivet               ierr = MatDestroy(&E);CHKERRQ(ierr);
273*56bd94f4SPierre Jolivet               ierr = MatDenseRestoreArrayWrite(Dt, &d_ptr);CHKERRQ(ierr);
274*56bd94f4SPierre Jolivet               ierr = MatDenseRestoreArrayRead(Ct, &c_ptr);CHKERRQ(ierr);
275*56bd94f4SPierre Jolivet               ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr);
276*56bd94f4SPierre Jolivet               ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr);
277*56bd94f4SPierre Jolivet             }
278*56bd94f4SPierre Jolivet           } else {
279*56bd94f4SPierre Jolivet             const PetscScalar *c_ptr;
280*56bd94f4SPierre Jolivet             PetscScalar       *d_ptr;
281*56bd94f4SPierre Jolivet 
282*56bd94f4SPierre Jolivet             ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr);
283*56bd94f4SPierre Jolivet             ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr);
284*56bd94f4SPierre Jolivet             PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
285*56bd94f4SPierre Jolivet             ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN);
286*56bd94f4SPierre Jolivet             ierr = PetscLogStagePush(stage);CHKERRQ(ierr);
287*56bd94f4SPierre Jolivet             for (t = 0; t < trial; ++t) {
288*56bd94f4SPierre Jolivet               PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
289*56bd94f4SPierre Jolivet             }
290*56bd94f4SPierre Jolivet             ierr = PetscLogStagePop();CHKERRQ(ierr);
291*56bd94f4SPierre Jolivet             ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr);
292*56bd94f4SPierre Jolivet             ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr);
293*56bd94f4SPierre Jolivet           }
294*56bd94f4SPierre Jolivet         } else if (maij) {
295*56bd94f4SPierre Jolivet           ierr = MatDestroy(&C);CHKERRQ(ierr);
296*56bd94f4SPierre Jolivet           ierr = MatDestroy(&D);CHKERRQ(ierr);
297*56bd94f4SPierre Jolivet           continue;
298*56bd94f4SPierre Jolivet         } else if (!mkl) {
299*56bd94f4SPierre Jolivet           Vec cC, cD;
300*56bd94f4SPierre Jolivet 
301*56bd94f4SPierre Jolivet           ierr = MatDenseGetColumnVecRead(C, 0, &cC);CHKERRQ(ierr);
302*56bd94f4SPierre Jolivet           ierr = MatDenseGetColumnVecWrite(D, 0, &cD);CHKERRQ(ierr);
303*56bd94f4SPierre Jolivet           ierr = MatMult(A, cC, cD);CHKERRQ(ierr);
304*56bd94f4SPierre Jolivet           ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D\n", Atype, AM, AN);
305*56bd94f4SPierre Jolivet           ierr = PetscLogStagePush(stage);CHKERRQ(ierr);
306*56bd94f4SPierre Jolivet           for (t = 0; t < trial; ++t) {
307*56bd94f4SPierre Jolivet             ierr = MatMult(A, cC, cD);CHKERRQ(ierr);
308*56bd94f4SPierre Jolivet           }
309*56bd94f4SPierre Jolivet           ierr = PetscLogStagePop();CHKERRQ(ierr);
310*56bd94f4SPierre Jolivet           ierr = MatDenseRestoreColumnVecRead(C, 0, &cC);CHKERRQ(ierr);
311*56bd94f4SPierre Jolivet           ierr = MatDenseRestoreColumnVecWrite(D, 0, &cD);CHKERRQ(ierr);
312*56bd94f4SPierre Jolivet         } else {
313*56bd94f4SPierre Jolivet           const PetscScalar *c_ptr;
314*56bd94f4SPierre Jolivet           PetscScalar       *d_ptr;
315*56bd94f4SPierre Jolivet 
316*56bd94f4SPierre Jolivet           ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr);
317*56bd94f4SPierre Jolivet           ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr);
318*56bd94f4SPierre Jolivet           ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %Dx%D\n", Atype, AM, AN);
319*56bd94f4SPierre Jolivet           PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
320*56bd94f4SPierre Jolivet           ierr = PetscLogStagePush(stage);CHKERRQ(ierr);
321*56bd94f4SPierre Jolivet           for (t = 0; t < trial; ++t) {
322*56bd94f4SPierre Jolivet             PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
323*56bd94f4SPierre Jolivet           }
324*56bd94f4SPierre Jolivet           ierr = PetscLogStagePop();CHKERRQ(ierr);
325*56bd94f4SPierre Jolivet           ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr);
326*56bd94f4SPierre Jolivet           ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr);
327*56bd94f4SPierre Jolivet         }
328*56bd94f4SPierre Jolivet 
329*56bd94f4SPierre Jolivet         if (check) {
330*56bd94f4SPierre Jolivet           ierr = MatMatMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr);
331*56bd94f4SPierre Jolivet           if (!flg) {
332*56bd94f4SPierre Jolivet             MatType Dtype;
333*56bd94f4SPierre Jolivet 
334*56bd94f4SPierre Jolivet             ierr = MatGetType(D, &Dtype);CHKERRQ(ierr);
335*56bd94f4SPierre Jolivet             ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr);
336*56bd94f4SPierre Jolivet           }
337*56bd94f4SPierre Jolivet         }
338*56bd94f4SPierre Jolivet 
339*56bd94f4SPierre Jolivet         /* MKL implementation seems buggy for ABt */
340*56bd94f4SPierre Jolivet         /* A*Bt */
341*56bd94f4SPierre Jolivet         if (!mkl && trans && N[k] > 1) {
342*56bd94f4SPierre Jolivet           ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr);
343*56bd94f4SPierre Jolivet           if (flg) {
344*56bd94f4SPierre Jolivet             ierr = MatTranspose(C, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr);
345*56bd94f4SPierre Jolivet             ierr = MatGetType(C, &Ctype);CHKERRQ(ierr);
346*56bd94f4SPierre Jolivet             if (!mkl) {
347*56bd94f4SPierre Jolivet               ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr);
348*56bd94f4SPierre Jolivet               ierr = MatProductSetType(D, MATPRODUCT_ABt);CHKERRQ(ierr);
349*56bd94f4SPierre Jolivet               ierr = MatProductSetFromOptions(D);CHKERRQ(ierr);
350*56bd94f4SPierre Jolivet               ierr = MatProductSymbolic(D);CHKERRQ(ierr);
351*56bd94f4SPierre Jolivet               ierr = MatProductNumeric(D);CHKERRQ(ierr);
352*56bd94f4SPierre Jolivet               ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and Bt %s %Dx%D\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN);
353*56bd94f4SPierre Jolivet               ierr = PetscLogStagePush(tstage);CHKERRQ(ierr);
354*56bd94f4SPierre Jolivet               for (t = 0; t < trial; ++t) {
355*56bd94f4SPierre Jolivet                 ierr = MatProductNumeric(D);CHKERRQ(ierr);
356*56bd94f4SPierre Jolivet               }
357*56bd94f4SPierre Jolivet               ierr = PetscLogStagePop();CHKERRQ(ierr);
358*56bd94f4SPierre Jolivet             } else {
359*56bd94f4SPierre Jolivet               const PetscScalar *c_ptr;
360*56bd94f4SPierre Jolivet               PetscScalar       *d_ptr;
361*56bd94f4SPierre Jolivet 
362*56bd94f4SPierre Jolivet               PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial));
363*56bd94f4SPierre Jolivet               PetscStackCallMKLSparse(mkl_sparse_optimize, (spr));
364*56bd94f4SPierre Jolivet               ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr);
365*56bd94f4SPierre Jolivet               ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr);
366*56bd94f4SPierre Jolivet               ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN);
367*56bd94f4SPierre Jolivet               PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
368*56bd94f4SPierre Jolivet               ierr = PetscLogStagePush(stage);CHKERRQ(ierr);
369*56bd94f4SPierre Jolivet               for (t = 0; t < trial; ++t) {
370*56bd94f4SPierre Jolivet                 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
371*56bd94f4SPierre Jolivet               }
372*56bd94f4SPierre Jolivet               ierr = PetscLogStagePop();CHKERRQ(ierr);
373*56bd94f4SPierre Jolivet               ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr);
374*56bd94f4SPierre Jolivet               ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr);
375*56bd94f4SPierre Jolivet             }
376*56bd94f4SPierre Jolivet           }
377*56bd94f4SPierre Jolivet         }
378*56bd94f4SPierre Jolivet 
379*56bd94f4SPierre Jolivet         if (!mkl && trans && N[k] > 1 && flg && check) {
380*56bd94f4SPierre Jolivet           ierr = MatMatTransposeMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr);
381*56bd94f4SPierre Jolivet           if (!flg) {
382*56bd94f4SPierre Jolivet             MatType Dtype;
383*56bd94f4SPierre Jolivet             ierr = MatGetType(D, &Dtype);CHKERRQ(ierr);
384*56bd94f4SPierre Jolivet             ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr);
385*56bd94f4SPierre Jolivet           }
386*56bd94f4SPierre Jolivet         }
387*56bd94f4SPierre Jolivet         ierr = MatDestroy(&C);CHKERRQ(ierr);
388*56bd94f4SPierre Jolivet         ierr = MatDestroy(&D);CHKERRQ(ierr);
389*56bd94f4SPierre Jolivet       }
390*56bd94f4SPierre Jolivet       if (mkl) {
391*56bd94f4SPierre Jolivet         PetscStackCallMKLSparse(mkl_sparse_destroy, (spr));
392*56bd94f4SPierre Jolivet         ierr = PetscFree(ia_ptr);CHKERRQ(ierr);
393*56bd94f4SPierre Jolivet         ierr = PetscFree(ja_ptr);CHKERRQ(ierr);
394*56bd94f4SPierre Jolivet       }
395*56bd94f4SPierre Jolivet       if (cuda && i != ntype - 1) {
396*56bd94f4SPierre Jolivet         ierr = PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n");CHKERRQ(ierr);
397*56bd94f4SPierre Jolivet         break;
398*56bd94f4SPierre Jolivet       }
399*56bd94f4SPierre Jolivet     }
400*56bd94f4SPierre Jolivet     if(E != A) {
401*56bd94f4SPierre Jolivet       ierr = MatDestroy(&E);CHKERRQ(ierr);
402*56bd94f4SPierre Jolivet     }
403*56bd94f4SPierre Jolivet     ierr = MatDestroy(&A);CHKERRQ(ierr);
404*56bd94f4SPierre Jolivet   }
405*56bd94f4SPierre Jolivet   for (m = 0; m < ntype; ++m) {
406*56bd94f4SPierre Jolivet     ierr = PetscFree(type[m]);CHKERRQ(ierr);
407*56bd94f4SPierre Jolivet   }
408*56bd94f4SPierre Jolivet   ierr = PetscFinalize();
409*56bd94f4SPierre Jolivet   return 0;
410*56bd94f4SPierre Jolivet }
411*56bd94f4SPierre Jolivet 
412*56bd94f4SPierre Jolivet /*TEST
413*56bd94f4SPierre Jolivet 
414*56bd94f4SPierre Jolivet    testset:
415*56bd94f4SPierre Jolivet      nsize: 1
416*56bd94f4SPierre Jolivet      requires: double !complex !define(PETSC_USE_64BIT_INDICES)
417*56bd94f4SPierre Jolivet      filter: sed "/Benchmarking/d"
418*56bd94f4SPierre Jolivet      args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output}
419*56bd94f4SPierre Jolivet      test:
420*56bd94f4SPierre Jolivet        suffix: basic
421*56bd94f4SPierre Jolivet        args: -type aij,sbaij,baij
422*56bd94f4SPierre Jolivet        output_file: output/ex237.out
423*56bd94f4SPierre Jolivet      test:
424*56bd94f4SPierre Jolivet        suffix: maij
425*56bd94f4SPierre Jolivet        args: -type aij,maij
426*56bd94f4SPierre Jolivet        output_file: output/ex237.out
427*56bd94f4SPierre Jolivet      test:
428*56bd94f4SPierre Jolivet        suffix: cuda
429*56bd94f4SPierre Jolivet        requires: cuda
430*56bd94f4SPierre Jolivet        args: -type aij,aijcusparse
431*56bd94f4SPierre Jolivet        output_file: output/ex237.out
432*56bd94f4SPierre Jolivet      test:
433*56bd94f4SPierre Jolivet        suffix: mkl
434*56bd94f4SPierre Jolivet        requires: mkl
435*56bd94f4SPierre Jolivet        args: -type aij,aijmkl,baijmkl,sbaijmkl
436*56bd94f4SPierre Jolivet        output_file: output/ex237.out
437*56bd94f4SPierre Jolivet 
438*56bd94f4SPierre Jolivet TEST*/
439