xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision ae1ee55146a7ad071171b861759b85940c7e5c67)
151d8ba46SPierre Jolivet #include <petscsys.h>
251d8ba46SPierre Jolivet #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
351d8ba46SPierre Jolivet #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4d305a81bSVasiliy Kozyrev 
52475b7caSBarry Smith #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
669ab9685SBarry Smith   #define MKL_ILP64
769ab9685SBarry Smith #endif
8d305a81bSVasiliy Kozyrev #include <mkl.h>
9d305a81bSVasiliy Kozyrev #include <mkl_cluster_sparse_solver.h>
10d305a81bSVasiliy Kozyrev 
11d305a81bSVasiliy Kozyrev /*
12d305a81bSVasiliy Kozyrev  *  Possible mkl_cpardiso phases that controls the execution of the solver.
13d305a81bSVasiliy Kozyrev  *  For more information check mkl_cpardiso manual.
14d305a81bSVasiliy Kozyrev  */
15d305a81bSVasiliy Kozyrev #define JOB_ANALYSIS                                                    11
16db1f124cSVasiliy Kozyrev #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION                            12
17db1f124cSVasiliy Kozyrev #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
18d305a81bSVasiliy Kozyrev #define JOB_NUMERICAL_FACTORIZATION                                     22
19db1f124cSVasiliy Kozyrev #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT          23
20d305a81bSVasiliy Kozyrev #define JOB_SOLVE_ITERATIVE_REFINEMENT                                  33
21db1f124cSVasiliy Kozyrev #define JOB_SOLVE_FORWARD_SUBSTITUTION                                  331
22db1f124cSVasiliy Kozyrev #define JOB_SOLVE_DIAGONAL_SUBSTITUTION                                 332
23db1f124cSVasiliy Kozyrev #define JOB_SOLVE_BACKWARD_SUBSTITUTION                                 333
24db1f124cSVasiliy Kozyrev #define JOB_RELEASE_OF_LU_MEMORY                                        0
25d305a81bSVasiliy Kozyrev #define JOB_RELEASE_OF_ALL_MEMORY                                       -1
26d305a81bSVasiliy Kozyrev 
27d305a81bSVasiliy Kozyrev #define IPARM_SIZE 64
28d305a81bSVasiliy Kozyrev #define INT_TYPE   MKL_INT
29d305a81bSVasiliy Kozyrev 
30d71ae5a4SJacob Faibussowitsch static const char *Err_MSG_CPardiso(int errNo)
31d71ae5a4SJacob Faibussowitsch {
32d305a81bSVasiliy Kozyrev   switch (errNo) {
33d71ae5a4SJacob Faibussowitsch   case -1:
34d71ae5a4SJacob Faibussowitsch     return "input inconsistent";
35d71ae5a4SJacob Faibussowitsch     break;
36d71ae5a4SJacob Faibussowitsch   case -2:
37d71ae5a4SJacob Faibussowitsch     return "not enough memory";
38d71ae5a4SJacob Faibussowitsch     break;
39d71ae5a4SJacob Faibussowitsch   case -3:
40d71ae5a4SJacob Faibussowitsch     return "reordering problem";
41d71ae5a4SJacob Faibussowitsch     break;
42d71ae5a4SJacob Faibussowitsch   case -4:
43d71ae5a4SJacob Faibussowitsch     return "zero pivot, numerical factorization or iterative refinement problem";
44d71ae5a4SJacob Faibussowitsch     break;
45d71ae5a4SJacob Faibussowitsch   case -5:
46d71ae5a4SJacob Faibussowitsch     return "unclassified (internal) error";
47d71ae5a4SJacob Faibussowitsch     break;
48d71ae5a4SJacob Faibussowitsch   case -6:
49d71ae5a4SJacob Faibussowitsch     return "preordering failed (matrix types 11, 13 only)";
50d71ae5a4SJacob Faibussowitsch     break;
51d71ae5a4SJacob Faibussowitsch   case -7:
52d71ae5a4SJacob Faibussowitsch     return "diagonal matrix problem";
53d71ae5a4SJacob Faibussowitsch     break;
54d71ae5a4SJacob Faibussowitsch   case -8:
55d71ae5a4SJacob Faibussowitsch     return "32-bit integer overflow problem";
56d71ae5a4SJacob Faibussowitsch     break;
57d71ae5a4SJacob Faibussowitsch   case -9:
58d71ae5a4SJacob Faibussowitsch     return "not enough memory for OOC";
59d71ae5a4SJacob Faibussowitsch     break;
60d71ae5a4SJacob Faibussowitsch   case -10:
61d71ae5a4SJacob Faibussowitsch     return "problems with opening OOC temporary files";
62d71ae5a4SJacob Faibussowitsch     break;
63d71ae5a4SJacob Faibussowitsch   case -11:
64d71ae5a4SJacob Faibussowitsch     return "read/write problems with the OOC data file";
65d71ae5a4SJacob Faibussowitsch     break;
66d71ae5a4SJacob Faibussowitsch   default:
67d71ae5a4SJacob Faibussowitsch     return "unknown error";
68d305a81bSVasiliy Kozyrev   }
69d305a81bSVasiliy Kozyrev }
70d305a81bSVasiliy Kozyrev 
71aaaa354bSBarry Smith #define PetscCallCluster(f) PetscStackCallExternalVoid("cluster_sparse_solver", f);
72aaaa354bSBarry Smith 
73d305a81bSVasiliy Kozyrev /*
74d305a81bSVasiliy Kozyrev  *  Internal data structure.
75d305a81bSVasiliy Kozyrev  *  For more information check mkl_cpardiso manual.
76d305a81bSVasiliy Kozyrev  */
77d305a81bSVasiliy Kozyrev 
78d305a81bSVasiliy Kozyrev typedef struct {
79d305a81bSVasiliy Kozyrev   /* Configuration vector */
80d305a81bSVasiliy Kozyrev   INT_TYPE iparm[IPARM_SIZE];
81d305a81bSVasiliy Kozyrev 
82d305a81bSVasiliy Kozyrev   /*
83d305a81bSVasiliy Kozyrev    * Internal mkl_cpardiso memory location.
84d305a81bSVasiliy Kozyrev    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
85d305a81bSVasiliy Kozyrev    */
86d305a81bSVasiliy Kozyrev   void *pt[IPARM_SIZE];
87d305a81bSVasiliy Kozyrev 
88ae02b5cfSPierre Jolivet   MPI_Fint comm_mkl_cpardiso;
89d305a81bSVasiliy Kozyrev 
90d305a81bSVasiliy Kozyrev   /* Basic mkl_cpardiso info*/
91d305a81bSVasiliy Kozyrev   INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
92d305a81bSVasiliy Kozyrev 
930b4b7b1cSBarry Smith   /* Matrix values and matrix nonzero structure */
94d305a81bSVasiliy Kozyrev   PetscScalar *a;
95d305a81bSVasiliy Kozyrev 
96d305a81bSVasiliy Kozyrev   INT_TYPE *ia, *ja;
97d305a81bSVasiliy Kozyrev 
98d305a81bSVasiliy Kozyrev   /* Number of non-zero elements */
99d305a81bSVasiliy Kozyrev   INT_TYPE nz;
100d305a81bSVasiliy Kozyrev 
101d305a81bSVasiliy Kozyrev   /* Row permutaton vector*/
102d305a81bSVasiliy Kozyrev   INT_TYPE *perm;
103d305a81bSVasiliy Kozyrev 
104a678f235SPierre Jolivet   /* Define is matrix preserve sparse structure. */
105d305a81bSVasiliy Kozyrev   MatStructure matstruc;
106d305a81bSVasiliy Kozyrev 
107d305a81bSVasiliy Kozyrev   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt *, PetscInt **, PetscInt **, PetscScalar **);
108d305a81bSVasiliy Kozyrev 
109d305a81bSVasiliy Kozyrev   /* True if mkl_cpardiso function have been used. */
110d305a81bSVasiliy Kozyrev   PetscBool CleanUp;
111d305a81bSVasiliy Kozyrev } Mat_MKL_CPARDISO;
112d305a81bSVasiliy Kozyrev 
113d305a81bSVasiliy Kozyrev /*
114d305a81bSVasiliy Kozyrev  * Copy the elements of matrix A.
115d305a81bSVasiliy Kozyrev  * Input:
116d305a81bSVasiliy Kozyrev  *   - Mat A: MATSEQAIJ matrix
117d305a81bSVasiliy Kozyrev  *   - int shift: matrix index.
118d305a81bSVasiliy Kozyrev  *     - 0 for c representation
119d305a81bSVasiliy Kozyrev  *     - 1 for fortran representation
120d305a81bSVasiliy Kozyrev  *   - MatReuse reuse:
121d305a81bSVasiliy Kozyrev  *     - MAT_INITIAL_MATRIX: Create a new aij representation
122d305a81bSVasiliy Kozyrev  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
123d305a81bSVasiliy Kozyrev  * Output:
124d305a81bSVasiliy Kozyrev  *   - int *nnz: Number of nonzero-elements.
125d305a81bSVasiliy Kozyrev  *   - int **r pointer to i index
126d305a81bSVasiliy Kozyrev  *   - int **c pointer to j elements
127d305a81bSVasiliy Kozyrev  *   - MATRIXTYPE **v: Non-zero elements
128d305a81bSVasiliy Kozyrev  */
12966976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
130d71ae5a4SJacob Faibussowitsch {
131d305a81bSVasiliy Kozyrev   Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
1322b26979fSBarry Smith 
133d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
134d305a81bSVasiliy Kozyrev   *v = aa->a;
135d305a81bSVasiliy Kozyrev   if (reuse == MAT_INITIAL_MATRIX) {
136d305a81bSVasiliy Kozyrev     *r   = (INT_TYPE *)aa->i;
137d305a81bSVasiliy Kozyrev     *c   = (INT_TYPE *)aa->j;
138d305a81bSVasiliy Kozyrev     *nnz = aa->nz;
139d305a81bSVasiliy Kozyrev   }
1403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
141d305a81bSVasiliy Kozyrev }
142d305a81bSVasiliy Kozyrev 
14366976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
144d71ae5a4SJacob Faibussowitsch {
145d305a81bSVasiliy Kozyrev   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
146566a9afdSBarry Smith   PetscInt           rstart, nz, i, j, countA, countB;
147d305a81bSVasiliy Kozyrev   PetscInt          *row, *col;
148566a9afdSBarry Smith   const PetscScalar *av, *bv;
149d305a81bSVasiliy Kozyrev   PetscScalar       *val;
150d305a81bSVasiliy Kozyrev   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
151f4f49eeaSPierre Jolivet   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ *)mat->A->data;
152f4f49eeaSPierre Jolivet   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ *)mat->B->data;
153566a9afdSBarry Smith   PetscInt           colA_start, jB, jcol;
154d305a81bSVasiliy Kozyrev 
155d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
1569371c9d4SSatish Balay   ai     = aa->i;
1579371c9d4SSatish Balay   aj     = aa->j;
1589371c9d4SSatish Balay   bi     = bb->i;
1599371c9d4SSatish Balay   bj     = bb->j;
1609371c9d4SSatish Balay   rstart = A->rmap->rstart;
1619371c9d4SSatish Balay   av     = aa->a;
1629371c9d4SSatish Balay   bv     = bb->a;
163d305a81bSVasiliy Kozyrev 
164d305a81bSVasiliy Kozyrev   garray = mat->garray;
165d305a81bSVasiliy Kozyrev 
166d305a81bSVasiliy Kozyrev   if (reuse == MAT_INITIAL_MATRIX) {
167d305a81bSVasiliy Kozyrev     nz   = aa->nz + bb->nz;
168d305a81bSVasiliy Kozyrev     *nnz = nz;
1699566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(m + 1, &row, nz, &col, nz, &val));
1709371c9d4SSatish Balay     *r = row;
1719371c9d4SSatish Balay     *c = col;
1729371c9d4SSatish Balay     *v = val;
173d305a81bSVasiliy Kozyrev   } else {
1749371c9d4SSatish Balay     row = *r;
1759371c9d4SSatish Balay     col = *c;
1769371c9d4SSatish Balay     val = *v;
177d305a81bSVasiliy Kozyrev   }
178d305a81bSVasiliy Kozyrev 
179d305a81bSVasiliy Kozyrev   nz = 0;
180d305a81bSVasiliy Kozyrev   for (i = 0; i < m; i++) {
181d305a81bSVasiliy Kozyrev     row[i] = nz;
182d305a81bSVasiliy Kozyrev     countA = ai[i + 1] - ai[i];
183d305a81bSVasiliy Kozyrev     countB = bi[i + 1] - bi[i];
184d305a81bSVasiliy Kozyrev     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
185d305a81bSVasiliy Kozyrev     bjj    = bj + bi[i];
186d305a81bSVasiliy Kozyrev 
187d305a81bSVasiliy Kozyrev     /* B part, smaller col index */
188d305a81bSVasiliy Kozyrev     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
189d305a81bSVasiliy Kozyrev     jB         = 0;
190d305a81bSVasiliy Kozyrev     for (j = 0; j < countB; j++) {
191d305a81bSVasiliy Kozyrev       jcol = garray[bjj[j]];
19251d8ba46SPierre Jolivet       if (jcol > colA_start) break;
193d305a81bSVasiliy Kozyrev       col[nz]   = jcol;
194d305a81bSVasiliy Kozyrev       val[nz++] = *bv++;
195d305a81bSVasiliy Kozyrev     }
19651d8ba46SPierre Jolivet     jB = j;
197d305a81bSVasiliy Kozyrev 
198d305a81bSVasiliy Kozyrev     /* A part */
199d305a81bSVasiliy Kozyrev     for (j = 0; j < countA; j++) {
200d305a81bSVasiliy Kozyrev       col[nz]   = rstart + ajj[j];
201d305a81bSVasiliy Kozyrev       val[nz++] = *av++;
202d305a81bSVasiliy Kozyrev     }
203d305a81bSVasiliy Kozyrev 
204d305a81bSVasiliy Kozyrev     /* B part, larger col index */
205d305a81bSVasiliy Kozyrev     for (j = jB; j < countB; j++) {
206d305a81bSVasiliy Kozyrev       col[nz]   = garray[bjj[j]];
207d305a81bSVasiliy Kozyrev       val[nz++] = *bv++;
208d305a81bSVasiliy Kozyrev     }
209d305a81bSVasiliy Kozyrev   }
210d305a81bSVasiliy Kozyrev   row[m] = nz;
2113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
212d305a81bSVasiliy Kozyrev }
213d305a81bSVasiliy Kozyrev 
21466976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
215d71ae5a4SJacob Faibussowitsch {
21651d8ba46SPierre Jolivet   const PetscInt    *ai, *aj, *bi, *bj, *garray, bs = A->rmap->bs, bs2 = bs * bs, m = A->rmap->n / bs, *ajj, *bjj;
21751d8ba46SPierre Jolivet   PetscInt           rstart, nz, i, j, countA, countB;
21851d8ba46SPierre Jolivet   PetscInt          *row, *col;
21951d8ba46SPierre Jolivet   const PetscScalar *av, *bv;
22051d8ba46SPierre Jolivet   PetscScalar       *val;
22151d8ba46SPierre Jolivet   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
222f4f49eeaSPierre Jolivet   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
223f4f49eeaSPierre Jolivet   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
22451d8ba46SPierre Jolivet   PetscInt           colA_start, jB, jcol;
22551d8ba46SPierre Jolivet 
22651d8ba46SPierre Jolivet   PetscFunctionBegin;
2279371c9d4SSatish Balay   ai     = aa->i;
2289371c9d4SSatish Balay   aj     = aa->j;
2299371c9d4SSatish Balay   bi     = bb->i;
2309371c9d4SSatish Balay   bj     = bb->j;
2319371c9d4SSatish Balay   rstart = A->rmap->rstart / bs;
2329371c9d4SSatish Balay   av     = aa->a;
2339371c9d4SSatish Balay   bv     = bb->a;
23451d8ba46SPierre Jolivet 
23551d8ba46SPierre Jolivet   garray = mat->garray;
23651d8ba46SPierre Jolivet 
23751d8ba46SPierre Jolivet   if (reuse == MAT_INITIAL_MATRIX) {
23851d8ba46SPierre Jolivet     nz   = aa->nz + bb->nz;
23951d8ba46SPierre Jolivet     *nnz = nz;
2409566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(m + 1, &row, nz, &col, nz * bs2, &val));
2419371c9d4SSatish Balay     *r = row;
2429371c9d4SSatish Balay     *c = col;
2439371c9d4SSatish Balay     *v = val;
24451d8ba46SPierre Jolivet   } else {
2459371c9d4SSatish Balay     row = *r;
2469371c9d4SSatish Balay     col = *c;
2479371c9d4SSatish Balay     val = *v;
24851d8ba46SPierre Jolivet   }
24951d8ba46SPierre Jolivet 
25051d8ba46SPierre Jolivet   nz = 0;
25151d8ba46SPierre Jolivet   for (i = 0; i < m; i++) {
25251d8ba46SPierre Jolivet     row[i] = nz + 1;
25351d8ba46SPierre Jolivet     countA = ai[i + 1] - ai[i];
25451d8ba46SPierre Jolivet     countB = bi[i + 1] - bi[i];
25551d8ba46SPierre Jolivet     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
25651d8ba46SPierre Jolivet     bjj    = bj + bi[i];
25751d8ba46SPierre Jolivet 
25851d8ba46SPierre Jolivet     /* B part, smaller col index */
25951d8ba46SPierre Jolivet     colA_start = rstart + (countA > 0 ? ajj[0] : 0); /* the smallest global col index of A */
26051d8ba46SPierre Jolivet     jB         = 0;
26151d8ba46SPierre Jolivet     for (j = 0; j < countB; j++) {
26251d8ba46SPierre Jolivet       jcol = garray[bjj[j]];
26351d8ba46SPierre Jolivet       if (jcol > colA_start) break;
26451d8ba46SPierre Jolivet       col[nz++] = jcol + 1;
26551d8ba46SPierre Jolivet     }
26651d8ba46SPierre Jolivet     jB = j;
2679566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(val, bv, jB * bs2));
26851d8ba46SPierre Jolivet     val += jB * bs2;
26951d8ba46SPierre Jolivet     bv += jB * bs2;
27051d8ba46SPierre Jolivet 
27151d8ba46SPierre Jolivet     /* A part */
27251d8ba46SPierre Jolivet     for (j = 0; j < countA; j++) col[nz++] = rstart + ajj[j] + 1;
2739566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(val, av, countA * bs2));
27451d8ba46SPierre Jolivet     val += countA * bs2;
27551d8ba46SPierre Jolivet     av += countA * bs2;
27651d8ba46SPierre Jolivet 
27751d8ba46SPierre Jolivet     /* B part, larger col index */
27851d8ba46SPierre Jolivet     for (j = jB; j < countB; j++) col[nz++] = garray[bjj[j]] + 1;
2799566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(val, bv, (countB - jB) * bs2));
28051d8ba46SPierre Jolivet     val += (countB - jB) * bs2;
28151d8ba46SPierre Jolivet     bv += (countB - jB) * bs2;
28251d8ba46SPierre Jolivet   }
28351d8ba46SPierre Jolivet   row[m] = nz + 1;
2843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28551d8ba46SPierre Jolivet }
28651d8ba46SPierre Jolivet 
28766976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
288d71ae5a4SJacob Faibussowitsch {
28951d8ba46SPierre Jolivet   const PetscInt    *ai, *aj, *bi, *bj, *garray, bs = A->rmap->bs, bs2 = bs * bs, m = A->rmap->n / bs, *ajj, *bjj;
29051d8ba46SPierre Jolivet   PetscInt           rstart, nz, i, j, countA, countB;
29151d8ba46SPierre Jolivet   PetscInt          *row, *col;
29251d8ba46SPierre Jolivet   const PetscScalar *av, *bv;
29351d8ba46SPierre Jolivet   PetscScalar       *val;
29451d8ba46SPierre Jolivet   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
295f4f49eeaSPierre Jolivet   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
296f4f49eeaSPierre Jolivet   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
29751d8ba46SPierre Jolivet 
29851d8ba46SPierre Jolivet   PetscFunctionBegin;
2999371c9d4SSatish Balay   ai     = aa->i;
3009371c9d4SSatish Balay   aj     = aa->j;
3019371c9d4SSatish Balay   bi     = bb->i;
3029371c9d4SSatish Balay   bj     = bb->j;
3039371c9d4SSatish Balay   rstart = A->rmap->rstart / bs;
3049371c9d4SSatish Balay   av     = aa->a;
3059371c9d4SSatish Balay   bv     = bb->a;
30651d8ba46SPierre Jolivet 
30751d8ba46SPierre Jolivet   garray = mat->garray;
30851d8ba46SPierre Jolivet 
30951d8ba46SPierre Jolivet   if (reuse == MAT_INITIAL_MATRIX) {
31051d8ba46SPierre Jolivet     nz   = aa->nz + bb->nz;
31151d8ba46SPierre Jolivet     *nnz = nz;
3129566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(m + 1, &row, nz, &col, nz * bs2, &val));
3139371c9d4SSatish Balay     *r = row;
3149371c9d4SSatish Balay     *c = col;
3159371c9d4SSatish Balay     *v = val;
31651d8ba46SPierre Jolivet   } else {
3179371c9d4SSatish Balay     row = *r;
3189371c9d4SSatish Balay     col = *c;
3199371c9d4SSatish Balay     val = *v;
32051d8ba46SPierre Jolivet   }
32151d8ba46SPierre Jolivet 
32251d8ba46SPierre Jolivet   nz = 0;
32351d8ba46SPierre Jolivet   for (i = 0; i < m; i++) {
32451d8ba46SPierre Jolivet     row[i] = nz + 1;
32551d8ba46SPierre Jolivet     countA = ai[i + 1] - ai[i];
32651d8ba46SPierre Jolivet     countB = bi[i + 1] - bi[i];
32751d8ba46SPierre Jolivet     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
32851d8ba46SPierre Jolivet     bjj    = bj + bi[i];
32951d8ba46SPierre Jolivet 
33051d8ba46SPierre Jolivet     /* A part */
33151d8ba46SPierre Jolivet     for (j = 0; j < countA; j++) col[nz++] = rstart + ajj[j] + 1;
3329566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(val, av, countA * bs2));
33351d8ba46SPierre Jolivet     val += countA * bs2;
33451d8ba46SPierre Jolivet     av += countA * bs2;
33551d8ba46SPierre Jolivet 
33651d8ba46SPierre Jolivet     /* B part, larger col index */
33751d8ba46SPierre Jolivet     for (j = 0; j < countB; j++) col[nz++] = garray[bjj[j]] + 1;
3389566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(val, bv, countB * bs2));
33951d8ba46SPierre Jolivet     val += countB * bs2;
34051d8ba46SPierre Jolivet     bv += countB * bs2;
34151d8ba46SPierre Jolivet   }
34251d8ba46SPierre Jolivet   row[m] = nz + 1;
3433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
34451d8ba46SPierre Jolivet }
34551d8ba46SPierre Jolivet 
346d305a81bSVasiliy Kozyrev /*
347d305a81bSVasiliy Kozyrev  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
348d305a81bSVasiliy Kozyrev  */
34966976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
350d71ae5a4SJacob Faibussowitsch {
351660873c6SBarry Smith   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
352ae02b5cfSPierre Jolivet   MPI_Comm          comm;
353d305a81bSVasiliy Kozyrev 
354d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
355d305a81bSVasiliy Kozyrev   /* Terminate instance, deallocate memories */
356d305a81bSVasiliy Kozyrev   if (mat_mkl_cpardiso->CleanUp) {
357d305a81bSVasiliy Kozyrev     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
358d305a81bSVasiliy Kozyrev 
359aaaa354bSBarry Smith     PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, NULL, NULL, NULL, mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs,
360aaaa354bSBarry Smith                                            mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err));
361d305a81bSVasiliy Kozyrev   }
36248a46eb9SPierre Jolivet   if (mat_mkl_cpardiso->ConvertToTriples != MatCopy_seqaij_seqaij_MKL_CPARDISO) PetscCall(PetscFree3(mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja, mat_mkl_cpardiso->a));
363ae02b5cfSPierre Jolivet   comm = MPI_Comm_f2c(mat_mkl_cpardiso->comm_mkl_cpardiso);
3649566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free(&comm));
3659566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
366d305a81bSVasiliy Kozyrev 
367d305a81bSVasiliy Kozyrev   /* clear composed functions */
3689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
3699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMkl_CPardisoSetCntl_C", NULL));
3703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
371d305a81bSVasiliy Kozyrev }
372d305a81bSVasiliy Kozyrev 
373d305a81bSVasiliy Kozyrev /*
374d305a81bSVasiliy Kozyrev  * Computes Ax = b
375d305a81bSVasiliy Kozyrev  */
37666976f2fSJacob Faibussowitsch static PetscErrorCode MatSolve_MKL_CPARDISO(Mat A, Vec b, Vec x)
377d71ae5a4SJacob Faibussowitsch {
378bd5dbebeSNils Friess   Mat_MKL_CPARDISO  *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
379d9ca1df4SBarry Smith   PetscScalar       *xarray;
380d9ca1df4SBarry Smith   const PetscScalar *barray;
381d305a81bSVasiliy Kozyrev 
382d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
383d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->nrhs = 1;
3849566063dSJacob Faibussowitsch   PetscCall(VecGetArray(x, &xarray));
3859566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(b, &barray));
386d305a81bSVasiliy Kozyrev 
387d305a81bSVasiliy Kozyrev   /* solve phase */
388d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
389aaaa354bSBarry Smith   PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
390aaaa354bSBarry Smith                                          mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err));
3911d27aa22SBarry Smith   PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\". Please check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err));
392d305a81bSVasiliy Kozyrev 
3939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(x, &xarray));
3949566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(b, &barray));
395d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
3963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
397d305a81bSVasiliy Kozyrev }
398d305a81bSVasiliy Kozyrev 
399bd5dbebeSNils Friess static PetscErrorCode MatForwardSolve_MKL_CPARDISO(Mat A, Vec b, Vec x)
400bd5dbebeSNils Friess {
401bd5dbebeSNils Friess   Mat_MKL_CPARDISO  *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
402bd5dbebeSNils Friess   PetscScalar       *xarray;
403bd5dbebeSNils Friess   const PetscScalar *barray;
404bd5dbebeSNils Friess 
405bd5dbebeSNils Friess   PetscFunctionBegin;
406bd5dbebeSNils Friess   mat_mkl_cpardiso->nrhs = 1;
407bd5dbebeSNils Friess   PetscCall(VecGetArray(x, &xarray));
408bd5dbebeSNils Friess   PetscCall(VecGetArrayRead(b, &barray));
409bd5dbebeSNils Friess 
410bd5dbebeSNils Friess   /* solve phase */
411bd5dbebeSNils Friess   mat_mkl_cpardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
412aaaa354bSBarry Smith   PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
413aaaa354bSBarry Smith                                          mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err));
414bd5dbebeSNils Friess   PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\". Please check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err));
415bd5dbebeSNils Friess 
416bd5dbebeSNils Friess   PetscCall(VecRestoreArray(x, &xarray));
417bd5dbebeSNils Friess   PetscCall(VecRestoreArrayRead(b, &barray));
418bd5dbebeSNils Friess   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
419bd5dbebeSNils Friess   PetscFunctionReturn(PETSC_SUCCESS);
420bd5dbebeSNils Friess }
421bd5dbebeSNils Friess 
422bd5dbebeSNils Friess static PetscErrorCode MatBackwardSolve_MKL_CPARDISO(Mat A, Vec b, Vec x)
423bd5dbebeSNils Friess {
424bd5dbebeSNils Friess   Mat_MKL_CPARDISO  *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
425bd5dbebeSNils Friess   PetscScalar       *xarray;
426bd5dbebeSNils Friess   const PetscScalar *barray;
427bd5dbebeSNils Friess 
428bd5dbebeSNils Friess   PetscFunctionBegin;
429bd5dbebeSNils Friess   mat_mkl_cpardiso->nrhs = 1;
430bd5dbebeSNils Friess   PetscCall(VecGetArray(x, &xarray));
431bd5dbebeSNils Friess   PetscCall(VecGetArrayRead(b, &barray));
432bd5dbebeSNils Friess 
433bd5dbebeSNils Friess   /* solve phase */
434bd5dbebeSNils Friess   mat_mkl_cpardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION;
435aaaa354bSBarry Smith   PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
436aaaa354bSBarry Smith                                          mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err));
437bd5dbebeSNils Friess   PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\". Please check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err));
438bd5dbebeSNils Friess 
439bd5dbebeSNils Friess   PetscCall(VecRestoreArray(x, &xarray));
440bd5dbebeSNils Friess   PetscCall(VecRestoreArrayRead(b, &barray));
441bd5dbebeSNils Friess   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
442bd5dbebeSNils Friess   PetscFunctionReturn(PETSC_SUCCESS);
443bd5dbebeSNils Friess }
444bd5dbebeSNils Friess 
44566976f2fSJacob Faibussowitsch static PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A, Vec b, Vec x)
446d71ae5a4SJacob Faibussowitsch {
447660873c6SBarry Smith   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
448d305a81bSVasiliy Kozyrev 
449d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
450*fc2fb351SPierre Jolivet   mat_mkl_cpardiso->iparm[12 - 1] = PetscDefined(USE_COMPLEX) ? 1 : 2;
4519566063dSJacob Faibussowitsch   PetscCall(MatSolve_MKL_CPARDISO(A, b, x));
452d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->iparm[12 - 1] = 0;
4533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
454d305a81bSVasiliy Kozyrev }
455d305a81bSVasiliy Kozyrev 
45666976f2fSJacob Faibussowitsch static PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A, Mat B, Mat X)
457d71ae5a4SJacob Faibussowitsch {
458bd5dbebeSNils Friess   Mat_MKL_CPARDISO  *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
4598572280aSBarry Smith   PetscScalar       *xarray;
460680d2628SBarry Smith   const PetscScalar *barray;
461d305a81bSVasiliy Kozyrev 
462d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
4639566063dSJacob Faibussowitsch   PetscCall(MatGetSize(B, NULL, (PetscInt *)&mat_mkl_cpardiso->nrhs));
464d305a81bSVasiliy Kozyrev 
465d305a81bSVasiliy Kozyrev   if (mat_mkl_cpardiso->nrhs > 0) {
4669566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArrayRead(B, &barray));
4679566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(X, &xarray));
468d305a81bSVasiliy Kozyrev 
46908401ef6SPierre Jolivet     PetscCheck(barray != xarray, PETSC_COMM_SELF, PETSC_ERR_SUP, "B and X cannot share the same memory location");
470f952d8e8SHong Zhang 
471d305a81bSVasiliy Kozyrev     /* solve phase */
472d305a81bSVasiliy Kozyrev     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
473aaaa354bSBarry Smith     PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
474aaaa354bSBarry Smith                                            mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err));
4751d27aa22SBarry Smith     PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\". Please check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err));
4769566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArrayRead(B, &barray));
4779566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(X, &xarray));
478d305a81bSVasiliy Kozyrev   }
479d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
4803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
481d305a81bSVasiliy Kozyrev }
482d305a81bSVasiliy Kozyrev 
483d305a81bSVasiliy Kozyrev /*
484d305a81bSVasiliy Kozyrev  * LU Decomposition
485d305a81bSVasiliy Kozyrev  */
48666976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F, Mat A, const MatFactorInfo *info)
487d71ae5a4SJacob Faibussowitsch {
488bd5dbebeSNils Friess   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data;
489d305a81bSVasiliy Kozyrev 
490d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
491d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
4929566063dSJacob Faibussowitsch   PetscCall((*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a));
493d305a81bSVasiliy Kozyrev 
494d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
495aaaa354bSBarry Smith   PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
496aaaa354bSBarry Smith                                          mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, &mat_mkl_cpardiso->err));
4971d27aa22SBarry Smith   PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\". Please check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err));
498d305a81bSVasiliy Kozyrev 
499d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
500d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
5013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
502d305a81bSVasiliy Kozyrev }
503d305a81bSVasiliy Kozyrev 
504d305a81bSVasiliy Kozyrev /* Sets mkl_cpardiso options from the options database */
50566976f2fSJacob Faibussowitsch static PetscErrorCode MatSetFromOptions_MKL_CPARDISO(Mat F, Mat A)
506d71ae5a4SJacob Faibussowitsch {
507660873c6SBarry Smith   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data;
5084352ede6SMatthew G. Knepley   PetscInt          icntl, threads;
509d305a81bSVasiliy Kozyrev   PetscBool         flg;
510d305a81bSVasiliy Kozyrev 
511d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
5121d27aa22SBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MKL Cluster PARDISO Options", "Mat");
5131d27aa22SBarry Smith   PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_65", "Suggested number of threads to use within MKL Cluster PARDISO", "None", threads, &threads, &flg));
5144352ede6SMatthew G. Knepley   if (flg) mkl_set_num_threads((int)threads);
515d305a81bSVasiliy Kozyrev 
5169566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_66", "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time", "None", mat_mkl_cpardiso->maxfct, &icntl, &flg));
517d305a81bSVasiliy Kozyrev   if (flg) mat_mkl_cpardiso->maxfct = icntl;
518d305a81bSVasiliy Kozyrev 
5199566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_67", "Indicates the actual matrix for the solution phase", "None", mat_mkl_cpardiso->mnum, &icntl, &flg));
520d305a81bSVasiliy Kozyrev   if (flg) mat_mkl_cpardiso->mnum = icntl;
521d305a81bSVasiliy Kozyrev 
5229566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_68", "Message level information", "None", mat_mkl_cpardiso->msglvl, &icntl, &flg));
523d305a81bSVasiliy Kozyrev   if (flg) mat_mkl_cpardiso->msglvl = icntl;
524d305a81bSVasiliy Kozyrev 
5259566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_69", "Defines the matrix type", "None", mat_mkl_cpardiso->mtype, &icntl, &flg));
526560a203cSprj-   if (flg) mat_mkl_cpardiso->mtype = icntl;
5279566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_1", "Use default values", "None", mat_mkl_cpardiso->iparm[0], &icntl, &flg));
528d305a81bSVasiliy Kozyrev 
529d305a81bSVasiliy Kozyrev   if (flg && icntl != 0) {
5309566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_2", "Fill-in reducing ordering for the input matrix", "None", mat_mkl_cpardiso->iparm[1], &icntl, &flg));
531d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
532d305a81bSVasiliy Kozyrev 
5339566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_4", "Preconditioned CGS/CG", "None", mat_mkl_cpardiso->iparm[3], &icntl, &flg));
534d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
535d305a81bSVasiliy Kozyrev 
5369566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_5", "User permutation", "None", mat_mkl_cpardiso->iparm[4], &icntl, &flg));
537d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
538d305a81bSVasiliy Kozyrev 
5399566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_6", "Write solution on x", "None", mat_mkl_cpardiso->iparm[5], &icntl, &flg));
540d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
541d305a81bSVasiliy Kozyrev 
5429566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_8", "Iterative refinement step", "None", mat_mkl_cpardiso->iparm[7], &icntl, &flg));
543d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
544d305a81bSVasiliy Kozyrev 
5459566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_10", "Pivoting perturbation", "None", mat_mkl_cpardiso->iparm[9], &icntl, &flg));
546d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
547d305a81bSVasiliy Kozyrev 
5489566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_11", "Scaling vectors", "None", mat_mkl_cpardiso->iparm[10], &icntl, &flg));
549d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
550d305a81bSVasiliy Kozyrev 
5519566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_12", "Solve with transposed or conjugate transposed matrix A", "None", mat_mkl_cpardiso->iparm[11], &icntl, &flg));
552d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
553d305a81bSVasiliy Kozyrev 
554d0609cedSBarry Smith     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_13", "Improved accuracy using (non-) symmetric weighted matching", "None", mat_mkl_cpardiso->iparm[12], &icntl, &flg));
555d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
556d305a81bSVasiliy Kozyrev 
557d0609cedSBarry Smith     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_18", "Numbers of non-zero elements", "None", mat_mkl_cpardiso->iparm[17], &icntl, &flg));
558d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
559d305a81bSVasiliy Kozyrev 
5609566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_19", "Report number of floating point operations", "None", mat_mkl_cpardiso->iparm[18], &icntl, &flg));
561d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
562d305a81bSVasiliy Kozyrev 
5639566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_21", "Pivoting for symmetric indefinite matrices", "None", mat_mkl_cpardiso->iparm[20], &icntl, &flg));
564d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
565d305a81bSVasiliy Kozyrev 
5669566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_24", "Parallel factorization control", "None", mat_mkl_cpardiso->iparm[23], &icntl, &flg));
567d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
568d305a81bSVasiliy Kozyrev 
5699566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_25", "Parallel forward/backward solve control", "None", mat_mkl_cpardiso->iparm[24], &icntl, &flg));
570d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
571d305a81bSVasiliy Kozyrev 
5729566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_27", "Matrix checker", "None", mat_mkl_cpardiso->iparm[26], &icntl, &flg));
573d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
574d305a81bSVasiliy Kozyrev 
5759566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_31", "Partial solve and computing selected components of the solution vectors", "None", mat_mkl_cpardiso->iparm[30], &icntl, &flg));
576d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
577d305a81bSVasiliy Kozyrev 
5789566063dSJacob Faibussowitsch     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_34", "Optimal number of threads for conditional numerical reproducibility (CNR) mode", "None", mat_mkl_cpardiso->iparm[33], &icntl, &flg));
579d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
580d305a81bSVasiliy Kozyrev 
5811d27aa22SBarry Smith     PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_60", "Intel MKL Cluster PARDISO mode", "None", mat_mkl_cpardiso->iparm[59], &icntl, &flg));
582d305a81bSVasiliy Kozyrev     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
583d305a81bSVasiliy Kozyrev   }
584d305a81bSVasiliy Kozyrev 
585d0609cedSBarry Smith   PetscOptionsEnd();
5863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
587d305a81bSVasiliy Kozyrev }
588d305a81bSVasiliy Kozyrev 
58966976f2fSJacob Faibussowitsch static PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
590d71ae5a4SJacob Faibussowitsch {
59151d8ba46SPierre Jolivet   PetscInt    bs;
59251d8ba46SPierre Jolivet   PetscBool   match;
5932b26979fSBarry Smith   PetscMPIInt size;
594ae02b5cfSPierre Jolivet   MPI_Comm    comm;
595d305a81bSVasiliy Kozyrev 
596d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
5979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)A), &comm));
5989566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
599ae02b5cfSPierre Jolivet   mat_mkl_cpardiso->comm_mkl_cpardiso = MPI_Comm_c2f(comm);
600d305a81bSVasiliy Kozyrev 
601d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
602d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->maxfct  = 1;
603d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->mnum    = 1;
604d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->n       = A->rmap->N;
60551d8ba46SPierre Jolivet   if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
606d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->msglvl = 0;
607d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->nrhs   = 1;
608d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->err    = 0;
609d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->phase  = -1;
610*fc2fb351SPierre Jolivet   mat_mkl_cpardiso->mtype  = PetscDefined(USE_COMPLEX) ? 13 : 11;
611d305a81bSVasiliy Kozyrev 
612*fc2fb351SPierre Jolivet   mat_mkl_cpardiso->iparm[27] = PetscDefined(USE_REAL_SINGLE) ? 1 : 0;
613d305a81bSVasiliy Kozyrev 
614da81f932SPierre Jolivet   mat_mkl_cpardiso->iparm[0]  = 1;  /* Solver default parameters overridden with provided by iparm */
615d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->iparm[1]  = 2;  /* Use METIS for fill-in reordering */
616d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->iparm[5]  = 0;  /* Write solution into x */
617d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->iparm[7]  = 2;  /* Max number of iterative refinement steps */
618d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->iparm[9]  = 13; /* Perturb the pivot elements with 1E-13 */
619d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->iparm[10] = 1;  /* Use nonsymmetric permutation and scaling MPS */
620d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->iparm[12] = 1;  /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
621d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
622d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
623d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->iparm[26] = 1;  /* Check input data for correctness */
624d305a81bSVasiliy Kozyrev 
625d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->iparm[39] = 0;
6262b26979fSBarry Smith   if (size > 1) {
627d305a81bSVasiliy Kozyrev     mat_mkl_cpardiso->iparm[39] = 2;
628d305a81bSVasiliy Kozyrev     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
629d305a81bSVasiliy Kozyrev     mat_mkl_cpardiso->iparm[41] = A->rmap->rend - 1;
630d305a81bSVasiliy Kozyrev   }
6319566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &match, MATMPIBAIJ, MATMPISBAIJ, ""));
63251d8ba46SPierre Jolivet   if (match) {
6339566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSize(A, &bs));
63451d8ba46SPierre Jolivet     mat_mkl_cpardiso->iparm[36] = bs;
63551d8ba46SPierre Jolivet     mat_mkl_cpardiso->iparm[40] /= bs;
63651d8ba46SPierre Jolivet     mat_mkl_cpardiso->iparm[41] /= bs;
63751d8ba46SPierre Jolivet     mat_mkl_cpardiso->iparm[40]++;
63851d8ba46SPierre Jolivet     mat_mkl_cpardiso->iparm[41]++;
63951d8ba46SPierre Jolivet     mat_mkl_cpardiso->iparm[34] = 0; /* Fortran style */
64051d8ba46SPierre Jolivet   } else {
64151d8ba46SPierre Jolivet     mat_mkl_cpardiso->iparm[34] = 1; /* C style */
64251d8ba46SPierre Jolivet   }
64351d8ba46SPierre Jolivet 
644d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->perm = 0;
6453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
646d305a81bSVasiliy Kozyrev }
647d305a81bSVasiliy Kozyrev 
648d305a81bSVasiliy Kozyrev /*
649d305a81bSVasiliy Kozyrev  * Symbolic decomposition. Mkl_Pardiso analysis phase.
650d305a81bSVasiliy Kozyrev  */
65166976f2fSJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
652d71ae5a4SJacob Faibussowitsch {
653660873c6SBarry Smith   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data;
654d305a81bSVasiliy Kozyrev 
655d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
656d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
657d305a81bSVasiliy Kozyrev 
658d305a81bSVasiliy Kozyrev   /* Set MKL_CPARDISO options from the options database */
65926cc229bSBarry Smith   PetscCall(MatSetFromOptions_MKL_CPARDISO(F, A));
6609566063dSJacob Faibussowitsch   PetscCall((*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_INITIAL_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a));
661d305a81bSVasiliy Kozyrev 
662d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->n = A->rmap->N;
66351d8ba46SPierre Jolivet   if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
664d305a81bSVasiliy Kozyrev 
665d305a81bSVasiliy Kozyrev   /* analysis phase */
666d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
667d305a81bSVasiliy Kozyrev 
668aaaa354bSBarry Smith   PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
669aaaa354bSBarry Smith                                          mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err));
6701d27aa22SBarry Smith   PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\".Check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err));
671d305a81bSVasiliy Kozyrev 
672d305a81bSVasiliy Kozyrev   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
673d305a81bSVasiliy Kozyrev   F->ops->lufactornumeric   = MatFactorNumeric_MKL_CPARDISO;
674d305a81bSVasiliy Kozyrev   F->ops->solve             = MatSolve_MKL_CPARDISO;
675bd5dbebeSNils Friess   F->ops->forwardsolve      = MatForwardSolve_MKL_CPARDISO;
676bd5dbebeSNils Friess   F->ops->backwardsolve     = MatBackwardSolve_MKL_CPARDISO;
677d305a81bSVasiliy Kozyrev   F->ops->solvetranspose    = MatSolveTranspose_MKL_CPARDISO;
678d305a81bSVasiliy Kozyrev   F->ops->matsolve          = MatMatSolve_MKL_CPARDISO;
6793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
680d305a81bSVasiliy Kozyrev }
681d305a81bSVasiliy Kozyrev 
68266976f2fSJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_CPARDISO(Mat F, Mat A, IS perm, const MatFactorInfo *info)
683d71ae5a4SJacob Faibussowitsch {
68451d8ba46SPierre Jolivet   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data;
68551d8ba46SPierre Jolivet 
68651d8ba46SPierre Jolivet   PetscFunctionBegin;
68751d8ba46SPierre Jolivet   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
68851d8ba46SPierre Jolivet 
68951d8ba46SPierre Jolivet   /* Set MKL_CPARDISO options from the options database */
69026cc229bSBarry Smith   PetscCall(MatSetFromOptions_MKL_CPARDISO(F, A));
6919566063dSJacob Faibussowitsch   PetscCall((*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_INITIAL_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a));
69251d8ba46SPierre Jolivet 
69351d8ba46SPierre Jolivet   mat_mkl_cpardiso->n = A->rmap->N;
69451d8ba46SPierre Jolivet   if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
6958b933ea1SPierre Jolivet   PetscCheck(!PetscDefined(USE_COMPLEX), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead");
696b94d7dedSBarry Smith   if (A->spd == PETSC_BOOL3_TRUE) mat_mkl_cpardiso->mtype = 2;
69751d8ba46SPierre Jolivet   else mat_mkl_cpardiso->mtype = -2;
69851d8ba46SPierre Jolivet 
69951d8ba46SPierre Jolivet   /* analysis phase */
70051d8ba46SPierre Jolivet   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
70151d8ba46SPierre Jolivet 
702aaaa354bSBarry Smith   PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
703aaaa354bSBarry Smith                                          mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err));
7041d27aa22SBarry Smith   PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\".Check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err));
70551d8ba46SPierre Jolivet 
70651d8ba46SPierre Jolivet   mat_mkl_cpardiso->CleanUp     = PETSC_TRUE;
70751d8ba46SPierre Jolivet   F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_CPARDISO;
70851d8ba46SPierre Jolivet   F->ops->solve                 = MatSolve_MKL_CPARDISO;
70951d8ba46SPierre Jolivet   F->ops->solvetranspose        = MatSolveTranspose_MKL_CPARDISO;
71051d8ba46SPierre Jolivet   F->ops->matsolve              = MatMatSolve_MKL_CPARDISO;
711bd5dbebeSNils Friess   if (A->spd == PETSC_BOOL3_TRUE) {
712bd5dbebeSNils Friess     F->ops->forwardsolve  = MatForwardSolve_MKL_CPARDISO;
713bd5dbebeSNils Friess     F->ops->backwardsolve = MatBackwardSolve_MKL_CPARDISO;
714bd5dbebeSNils Friess   }
7153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
71651d8ba46SPierre Jolivet }
71751d8ba46SPierre Jolivet 
71866976f2fSJacob Faibussowitsch static PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
719d71ae5a4SJacob Faibussowitsch {
7209f196a02SMartin Diehl   PetscBool         isascii;
721d305a81bSVasiliy Kozyrev   PetscViewerFormat format;
722660873c6SBarry Smith   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
723d305a81bSVasiliy Kozyrev   PetscInt          i;
724d305a81bSVasiliy Kozyrev 
725d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
726d305a81bSVasiliy Kozyrev   /* check if matrix is mkl_cpardiso type */
7273ba16761SJacob Faibussowitsch   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(PETSC_SUCCESS);
728d305a81bSVasiliy Kozyrev 
7299f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
7309f196a02SMartin Diehl   if (isascii) {
7319566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
732d305a81bSVasiliy Kozyrev     if (format == PETSC_VIEWER_ASCII_INFO) {
7331d27aa22SBarry Smith       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO run parameters:\n"));
7341d27aa22SBarry Smith       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO phase:             %d \n", mat_mkl_cpardiso->phase));
7351d27aa22SBarry Smith       for (i = 1; i <= 64; i++) PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO iparm[%d]:     %d \n", i, mat_mkl_cpardiso->iparm[i - 1]));
7361d27aa22SBarry Smith       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct));
7371d27aa22SBarry Smith       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum));
7381d27aa22SBarry Smith       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype));
7391d27aa22SBarry Smith       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO n:     %d \n", mat_mkl_cpardiso->n));
7401d27aa22SBarry Smith       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs));
7411d27aa22SBarry Smith       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl));
742d305a81bSVasiliy Kozyrev     }
743d305a81bSVasiliy Kozyrev   }
7443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
745d305a81bSVasiliy Kozyrev }
746d305a81bSVasiliy Kozyrev 
74766976f2fSJacob Faibussowitsch static PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
748d71ae5a4SJacob Faibussowitsch {
749660873c6SBarry Smith   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
750d305a81bSVasiliy Kozyrev 
751d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
752d305a81bSVasiliy Kozyrev   info->block_size        = 1.0;
753d305a81bSVasiliy Kozyrev   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
754d305a81bSVasiliy Kozyrev   info->nz_unneeded       = 0.0;
755d305a81bSVasiliy Kozyrev   info->assemblies        = 0.0;
756d305a81bSVasiliy Kozyrev   info->mallocs           = 0.0;
757d305a81bSVasiliy Kozyrev   info->memory            = 0.0;
758d305a81bSVasiliy Kozyrev   info->fill_ratio_given  = 0;
759d305a81bSVasiliy Kozyrev   info->fill_ratio_needed = 0;
760d305a81bSVasiliy Kozyrev   info->factor_mallocs    = 0;
7613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
762d305a81bSVasiliy Kozyrev }
763d305a81bSVasiliy Kozyrev 
76466976f2fSJacob Faibussowitsch static PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F, PetscInt icntl, PetscInt ival)
765d71ae5a4SJacob Faibussowitsch {
766660873c6SBarry Smith   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data;
7672b26979fSBarry Smith 
768d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
769d305a81bSVasiliy Kozyrev   if (icntl <= 64) {
770d305a81bSVasiliy Kozyrev     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
771d305a81bSVasiliy Kozyrev   } else {
772660873c6SBarry Smith     if (icntl == 65) mkl_set_num_threads((int)ival);
773660873c6SBarry Smith     else if (icntl == 66) mat_mkl_cpardiso->maxfct = ival;
774660873c6SBarry Smith     else if (icntl == 67) mat_mkl_cpardiso->mnum = ival;
775660873c6SBarry Smith     else if (icntl == 68) mat_mkl_cpardiso->msglvl = ival;
776560a203cSprj-     else if (icntl == 69) mat_mkl_cpardiso->mtype = ival;
777d305a81bSVasiliy Kozyrev   }
7783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
779d305a81bSVasiliy Kozyrev }
780d305a81bSVasiliy Kozyrev 
781d305a81bSVasiliy Kozyrev /*@
7821d27aa22SBarry Smith   MatMkl_CPardisoSetCntl - Set MKL Cluster PARDISO parameters
7831d27aa22SBarry Smith   <https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2023-2/onemkl-pardiso-parallel-direct-sparse-solver-iface.html>
784d305a81bSVasiliy Kozyrev 
785c3339decSBarry Smith   Logically Collective
786d305a81bSVasiliy Kozyrev 
787d305a81bSVasiliy Kozyrev   Input Parameters:
78811a5261eSBarry Smith + F     - the factored matrix obtained by calling `MatGetFactor()`
7891d27aa22SBarry Smith . icntl - index of MKL Cluster PARDISO parameter
7901d27aa22SBarry Smith - ival  - value of MKL Cluster PARDISO parameter
791d305a81bSVasiliy Kozyrev 
7923c7db156SBarry Smith   Options Database Key:
793147403d9SBarry Smith . -mat_mkl_cpardiso_<icntl> <ival> - set the option numbered icntl to ival
794d305a81bSVasiliy Kozyrev 
795fe59aa6dSJacob Faibussowitsch   Level: intermediate
7967ca43916SBarry Smith 
79711a5261eSBarry Smith   Note:
79811a5261eSBarry Smith   This routine cannot be used if you are solving the linear system with `TS`, `SNES`, or `KSP`, only if you directly call `MatGetFactor()` so use the options
7992ef1f0ffSBarry Smith   database approach when working with `TS`, `SNES`, or `KSP`. See `MATSOLVERMKL_CPARDISO` for the options
800d305a81bSVasiliy Kozyrev 
8011cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MATMPIAIJ`, `MATSOLVERMKL_CPARDISO`
802d305a81bSVasiliy Kozyrev @*/
803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMkl_CPardisoSetCntl(Mat F, PetscInt icntl, PetscInt ival)
804d71ae5a4SJacob Faibussowitsch {
805d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
806cac4c232SBarry Smith   PetscTryMethod(F, "MatMkl_CPardisoSetCntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
8073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
808d305a81bSVasiliy Kozyrev }
809d305a81bSVasiliy Kozyrev 
8109b4359b8SBarry Smith /*MC
8111d27aa22SBarry Smith   MATSOLVERMKL_CPARDISO -  A matrix type providing direct solvers (LU) for parallel matrices via the external package MKL Cluster PARDISO
8121d27aa22SBarry Smith   <https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2023-2/onemkl-pardiso-parallel-direct-sparse-solver-iface.html>
8139b4359b8SBarry Smith 
81411a5261eSBarry Smith   Works with `MATMPIAIJ` matrices
8159b4359b8SBarry Smith 
8162ef1f0ffSBarry Smith   Use `-pc_type lu` `-pc_factor_mat_solver_type mkl_cpardiso` to use this direct solver
8179b4359b8SBarry Smith 
8189b4359b8SBarry Smith   Options Database Keys:
8191d27aa22SBarry Smith + -mat_mkl_cpardiso_65 - Suggested number of threads to use within MKL Cluster PARDISO
8209b4359b8SBarry Smith . -mat_mkl_cpardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time
8219b4359b8SBarry Smith . -mat_mkl_cpardiso_67 - Indicates the actual matrix for the solution phase
8229b4359b8SBarry Smith . -mat_mkl_cpardiso_68 - Message level information, use 1 to get detailed information on the solver options
8239b4359b8SBarry Smith . -mat_mkl_cpardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type
8249b4359b8SBarry Smith . -mat_mkl_cpardiso_1  - Use default values
8259b4359b8SBarry Smith . -mat_mkl_cpardiso_2  - Fill-in reducing ordering for the input matrix
8269b4359b8SBarry Smith . -mat_mkl_cpardiso_4  - Preconditioned CGS/CG
8279b4359b8SBarry Smith . -mat_mkl_cpardiso_5  - User permutation
8289b4359b8SBarry Smith . -mat_mkl_cpardiso_6  - Write solution on x
8299b4359b8SBarry Smith . -mat_mkl_cpardiso_8  - Iterative refinement step
8309b4359b8SBarry Smith . -mat_mkl_cpardiso_10 - Pivoting perturbation
8319b4359b8SBarry Smith . -mat_mkl_cpardiso_11 - Scaling vectors
8329b4359b8SBarry Smith . -mat_mkl_cpardiso_12 - Solve with transposed or conjugate transposed matrix A
8339b4359b8SBarry Smith . -mat_mkl_cpardiso_13 - Improved accuracy using (non-) symmetric weighted matching
8349b4359b8SBarry Smith . -mat_mkl_cpardiso_18 - Numbers of non-zero elements
8359b4359b8SBarry Smith . -mat_mkl_cpardiso_19 - Report number of floating point operations
8369b4359b8SBarry Smith . -mat_mkl_cpardiso_21 - Pivoting for symmetric indefinite matrices
8379b4359b8SBarry Smith . -mat_mkl_cpardiso_24 - Parallel factorization control
8389b4359b8SBarry Smith . -mat_mkl_cpardiso_25 - Parallel forward/backward solve control
8399b4359b8SBarry Smith . -mat_mkl_cpardiso_27 - Matrix checker
8409b4359b8SBarry Smith . -mat_mkl_cpardiso_31 - Partial solve and computing selected components of the solution vectors
8419b4359b8SBarry Smith . -mat_mkl_cpardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode
8421d27aa22SBarry Smith - -mat_mkl_cpardiso_60 - Intel MKL Cluster PARDISO mode
8439b4359b8SBarry Smith 
8449b4359b8SBarry Smith   Level: beginner
8459b4359b8SBarry Smith 
8469b4359b8SBarry Smith   Notes:
8472ef1f0ffSBarry Smith   Use `-mat_mkl_cpardiso_68 1` to display the number of threads the solver is using. MKL does not provide a way to directly access this
8489b4359b8SBarry Smith   information.
8499b4359b8SBarry Smith 
8501d27aa22SBarry Smith   For more information on the options check
8511d27aa22SBarry Smith   <https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2023-2/onemkl-pardiso-parallel-direct-sparse-solver-iface.html>
8529b4359b8SBarry Smith 
8531d27aa22SBarry Smith .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMkl_CPardisoSetCntl()`, `MatGetFactor()`, `MATSOLVERMKL_PARDISO`
8549b4359b8SBarry Smith M*/
8559b4359b8SBarry Smith 
856d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
857d71ae5a4SJacob Faibussowitsch {
858d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
859d305a81bSVasiliy Kozyrev   *type = MATSOLVERMKL_CPARDISO;
8603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
861d305a81bSVasiliy Kozyrev }
862d305a81bSVasiliy Kozyrev 
863d305a81bSVasiliy Kozyrev /* MatGetFactor for MPI AIJ matrices */
864d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A, MatFactorType ftype, Mat *F)
865d71ae5a4SJacob Faibussowitsch {
866d305a81bSVasiliy Kozyrev   Mat               B;
867d305a81bSVasiliy Kozyrev   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
86851d8ba46SPierre Jolivet   PetscBool         isSeqAIJ, isMPIBAIJ, isMPISBAIJ;
869d305a81bSVasiliy Kozyrev 
870d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
871d305a81bSVasiliy Kozyrev   /* Create the factorization matrix */
872d305a81bSVasiliy Kozyrev 
8739566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
8749566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &isMPIBAIJ));
8759566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISBAIJ, &isMPISBAIJ));
8769566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
8779566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
8789566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy("mkl_cpardiso", &((PetscObject)B)->type_name));
8799566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
880d305a81bSVasiliy Kozyrev 
8814dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&mat_mkl_cpardiso));
882d305a81bSVasiliy Kozyrev 
883660873c6SBarry Smith   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
88451d8ba46SPierre Jolivet   else if (isMPIBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO;
88551d8ba46SPierre Jolivet   else if (isMPISBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO;
886660873c6SBarry Smith   else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
887d305a81bSVasiliy Kozyrev 
88851d8ba46SPierre Jolivet   if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
88951d8ba46SPierre Jolivet   else B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_CPARDISO;
890d305a81bSVasiliy Kozyrev   B->ops->destroy = MatDestroy_MKL_CPARDISO;
891d305a81bSVasiliy Kozyrev 
892d305a81bSVasiliy Kozyrev   B->ops->view    = MatView_MKL_CPARDISO;
893d305a81bSVasiliy Kozyrev   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
894d305a81bSVasiliy Kozyrev 
895d305a81bSVasiliy Kozyrev   B->factortype = ftype;
896d305a81bSVasiliy Kozyrev   B->assembled  = PETSC_TRUE; /* required by -ksp_view */
897d305a81bSVasiliy Kozyrev 
898660873c6SBarry Smith   B->data = mat_mkl_cpardiso;
899d305a81bSVasiliy Kozyrev 
90000c67f3bSHong Zhang   /* set solvertype */
9019566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
9029566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERMKL_CPARDISO, &B->solvertype));
90300c67f3bSHong Zhang 
9049566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mkl_cpardiso));
9059566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMkl_CPardisoSetCntl_C", MatMkl_CPardisoSetCntl_MKL_CPARDISO));
9069566063dSJacob Faibussowitsch   PetscCall(PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso));
907d305a81bSVasiliy Kozyrev 
908d305a81bSVasiliy Kozyrev   *F = B;
9093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
910d305a81bSVasiliy Kozyrev }
911d305a81bSVasiliy Kozyrev 
912d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
913d71ae5a4SJacob Faibussowitsch {
914d305a81bSVasiliy Kozyrev   PetscFunctionBegin;
9159566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_mpiaij_mkl_cpardiso));
9169566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_mpiaij_mkl_cpardiso));
9179566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_mpiaij_mkl_cpardiso));
9189566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_mpiaij_mkl_cpardiso));
9193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
920d305a81bSVasiliy Kozyrev }
921