xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision 2475b7ca256cea2a4b7cbf2d8babcda14e5fa36e)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>
4 
5 #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
6 #define MKL_ILP64
7 #endif
8 #include <mkl.h>
9 #include <mkl_cluster_sparse_solver.h>
10 
11 /*
12  *  Possible mkl_cpardiso phases that controls the execution of the solver.
13  *  For more information check mkl_cpardiso manual.
14  */
15 #define JOB_ANALYSIS 11
16 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
17 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
18 #define JOB_NUMERICAL_FACTORIZATION 22
19 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
20 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
21 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
22 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
23 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
24 #define JOB_RELEASE_OF_LU_MEMORY 0
25 #define JOB_RELEASE_OF_ALL_MEMORY -1
26 
27 #define IPARM_SIZE 64
28 #define INT_TYPE MKL_INT
29 
30 static const char *Err_MSG_CPardiso(int errNo){
31   switch (errNo) {
32     case -1:
33       return "input inconsistent"; break;
34     case -2:
35       return "not enough memory"; break;
36     case -3:
37       return "reordering problem"; break;
38     case -4:
39       return "zero pivot, numerical factorization or iterative refinement problem"; break;
40     case -5:
41       return "unclassified (internal) error"; break;
42     case -6:
43       return "preordering failed (matrix types 11, 13 only)"; break;
44     case -7:
45       return "diagonal matrix problem"; break;
46     case -8:
47       return "32-bit integer overflow problem"; break;
48     case -9:
49       return "not enough memory for OOC"; break;
50     case -10:
51       return "problems with opening OOC temporary files"; break;
52     case -11:
53       return "read/write problems with the OOC data file"; break;
54     default :
55       return "unknown error";
56   }
57 }
58 
59 /*
60  *  Internal data structure.
61  *  For more information check mkl_cpardiso manual.
62  */
63 
64 typedef struct {
65 
66   /* Configuration vector */
67   INT_TYPE     iparm[IPARM_SIZE];
68 
69   /*
70    * Internal mkl_cpardiso memory location.
71    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
72    */
73   void         *pt[IPARM_SIZE];
74 
75   MPI_Comm     comm_mkl_cpardiso;
76 
77   /* Basic mkl_cpardiso info*/
78   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
79 
80   /* Matrix structure */
81   PetscScalar  *a;
82 
83   INT_TYPE     *ia, *ja;
84 
85   /* Number of non-zero elements */
86   INT_TYPE     nz;
87 
88   /* Row permutaton vector*/
89   INT_TYPE     *perm;
90 
91   /* Define is matrix preserve sparce structure. */
92   MatStructure matstruc;
93 
94   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
95 
96   /* True if mkl_cpardiso function have been used. */
97   PetscBool CleanUp;
98 } Mat_MKL_CPARDISO;
99 
100 /*
101  * Copy the elements of matrix A.
102  * Input:
103  *   - Mat A: MATSEQAIJ matrix
104  *   - int shift: matrix index.
105  *     - 0 for c representation
106  *     - 1 for fortran representation
107  *   - MatReuse reuse:
108  *     - MAT_INITIAL_MATRIX: Create a new aij representation
109  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
110  * Output:
111  *   - int *nnz: Number of nonzero-elements.
112  *   - int **r pointer to i index
113  *   - int **c pointer to j elements
114  *   - MATRIXTYPE **v: Non-zero elements
115  */
116 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
117 {
118   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
119 
120   PetscFunctionBegin;
121   *v=aa->a;
122   if (reuse == MAT_INITIAL_MATRIX) {
123     *r   = (INT_TYPE*)aa->i;
124     *c   = (INT_TYPE*)aa->j;
125     *nnz = aa->nz;
126   }
127   PetscFunctionReturn(0);
128 }
129 
130 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
131 {
132   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
133   PetscErrorCode    ierr;
134   PetscInt          rstart,nz,i,j,countA,countB;
135   PetscInt          *row,*col;
136   const PetscScalar *av, *bv;
137   PetscScalar       *val;
138   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
139   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
140   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
141   PetscInt          colA_start,jB,jcol;
142 
143   PetscFunctionBegin;
144   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
145   av=aa->a; bv=bb->a;
146 
147   garray = mat->garray;
148 
149   if (reuse == MAT_INITIAL_MATRIX) {
150     nz   = aa->nz + bb->nz;
151     *nnz = nz;
152     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
153     col  = row + m + 1;
154     val  = (PetscScalar*)(col + nz);
155     *r = row; *c = col; *v = val;
156     row[0] = 0;
157   } else {
158     row = *r; col = *c; val = *v;
159   }
160 
161   nz = 0;
162   for (i=0; i<m; i++) {
163     row[i] = nz;
164     countA     = ai[i+1] - ai[i];
165     countB     = bi[i+1] - bi[i];
166     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
167     bjj        = bj + bi[i];
168 
169     /* B part, smaller col index */
170     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
171     jB         = 0;
172     for (j=0; j<countB; j++) {
173       jcol = garray[bjj[j]];
174       if (jcol > colA_start) {
175         jB = j;
176         break;
177       }
178       col[nz]   = jcol;
179       val[nz++] = *bv++;
180       if (j==countB-1) jB = countB;
181     }
182 
183     /* A part */
184     for (j=0; j<countA; j++) {
185       col[nz]   = rstart + ajj[j];
186       val[nz++] = *av++;
187     }
188 
189     /* B part, larger col index */
190     for (j=jB; j<countB; j++) {
191       col[nz]   = garray[bjj[j]];
192       val[nz++] = *bv++;
193     }
194   }
195   row[m] = nz;
196 
197   PetscFunctionReturn(0);
198 }
199 
200 /*
201  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
202  */
203 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
204 {
205   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
206   PetscErrorCode   ierr;
207 
208   PetscFunctionBegin;
209   /* Terminate instance, deallocate memories */
210   if (mat_mkl_cpardiso->CleanUp) {
211     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
212 
213     cluster_sparse_solver (
214       mat_mkl_cpardiso->pt,
215       &mat_mkl_cpardiso->maxfct,
216       &mat_mkl_cpardiso->mnum,
217       &mat_mkl_cpardiso->mtype,
218       &mat_mkl_cpardiso->phase,
219       &mat_mkl_cpardiso->n,
220       NULL,
221       NULL,
222       NULL,
223       mat_mkl_cpardiso->perm,
224       &mat_mkl_cpardiso->nrhs,
225       mat_mkl_cpardiso->iparm,
226       &mat_mkl_cpardiso->msglvl,
227       NULL,
228       NULL,
229       &mat_mkl_cpardiso->comm_mkl_cpardiso,
230       (PetscInt*)&mat_mkl_cpardiso->err);
231   }
232 
233   if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) {
234     ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr);
235   }
236   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
237   ierr = PetscFree(A->data);CHKERRQ(ierr);
238 
239   /* clear composed functions */
240   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
241   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
242   PetscFunctionReturn(0);
243 }
244 
245 /*
246  * Computes Ax = b
247  */
248 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
249 {
250   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
251   PetscErrorCode    ierr;
252   PetscScalar       *xarray;
253   const PetscScalar *barray;
254 
255   PetscFunctionBegin;
256   mat_mkl_cpardiso->nrhs = 1;
257   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
258   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
259 
260   /* solve phase */
261   /*-------------*/
262   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
263   cluster_sparse_solver (
264     mat_mkl_cpardiso->pt,
265     &mat_mkl_cpardiso->maxfct,
266     &mat_mkl_cpardiso->mnum,
267     &mat_mkl_cpardiso->mtype,
268     &mat_mkl_cpardiso->phase,
269     &mat_mkl_cpardiso->n,
270     mat_mkl_cpardiso->a,
271     mat_mkl_cpardiso->ia,
272     mat_mkl_cpardiso->ja,
273     mat_mkl_cpardiso->perm,
274     &mat_mkl_cpardiso->nrhs,
275     mat_mkl_cpardiso->iparm,
276     &mat_mkl_cpardiso->msglvl,
277     (void*)barray,
278     (void*)xarray,
279     &mat_mkl_cpardiso->comm_mkl_cpardiso,
280     (PetscInt*)&mat_mkl_cpardiso->err);
281 
282   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
283 
284   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
285   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
286   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
287   PetscFunctionReturn(0);
288 }
289 
290 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
291 {
292   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
293   PetscErrorCode   ierr;
294 
295   PetscFunctionBegin;
296 #if defined(PETSC_USE_COMPLEX)
297   mat_mkl_cpardiso->iparm[12 - 1] = 1;
298 #else
299   mat_mkl_cpardiso->iparm[12 - 1] = 2;
300 #endif
301   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
302   mat_mkl_cpardiso->iparm[12 - 1] = 0;
303   PetscFunctionReturn(0);
304 }
305 
306 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
307 {
308   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
309   PetscErrorCode    ierr;
310   PetscScalar       *xarray;
311   const PetscScalar *barray;
312 
313   PetscFunctionBegin;
314   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
315 
316   if(mat_mkl_cpardiso->nrhs > 0){
317     ierr = MatDenseGetArrayRead(B,&barray);
318     ierr = MatDenseGetArray(X,&xarray);
319 
320     /* solve phase */
321     /*-------------*/
322     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
323     cluster_sparse_solver (
324       mat_mkl_cpardiso->pt,
325       &mat_mkl_cpardiso->maxfct,
326       &mat_mkl_cpardiso->mnum,
327       &mat_mkl_cpardiso->mtype,
328       &mat_mkl_cpardiso->phase,
329       &mat_mkl_cpardiso->n,
330       mat_mkl_cpardiso->a,
331       mat_mkl_cpardiso->ia,
332       mat_mkl_cpardiso->ja,
333       mat_mkl_cpardiso->perm,
334       &mat_mkl_cpardiso->nrhs,
335       mat_mkl_cpardiso->iparm,
336       &mat_mkl_cpardiso->msglvl,
337       (void*)barray,
338       (void*)xarray,
339       &mat_mkl_cpardiso->comm_mkl_cpardiso,
340       (PetscInt*)&mat_mkl_cpardiso->err);
341     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
342     ierr = MatDenseRestoreArrayRead(B,&barray);
343     ierr = MatDenseRestoreArray(X,&xarray);
344 
345   }
346   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
347   PetscFunctionReturn(0);
348 
349 }
350 
351 /*
352  * LU Decomposition
353  */
354 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
355 {
356   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
357   PetscErrorCode   ierr;
358 
359   PetscFunctionBegin;
360   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
361   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
362 
363   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
364   cluster_sparse_solver (
365     mat_mkl_cpardiso->pt,
366     &mat_mkl_cpardiso->maxfct,
367     &mat_mkl_cpardiso->mnum,
368     &mat_mkl_cpardiso->mtype,
369     &mat_mkl_cpardiso->phase,
370     &mat_mkl_cpardiso->n,
371     mat_mkl_cpardiso->a,
372     mat_mkl_cpardiso->ia,
373     mat_mkl_cpardiso->ja,
374     mat_mkl_cpardiso->perm,
375     &mat_mkl_cpardiso->nrhs,
376     mat_mkl_cpardiso->iparm,
377     &mat_mkl_cpardiso->msglvl,
378     NULL,
379     NULL,
380     &mat_mkl_cpardiso->comm_mkl_cpardiso,
381     &mat_mkl_cpardiso->err);
382   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
383 
384   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
385   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
386   PetscFunctionReturn(0);
387 }
388 
389 /* Sets mkl_cpardiso options from the options database */
390 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
391 {
392   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
393   PetscErrorCode      ierr;
394   PetscInt            icntl,threads;
395   PetscBool           flg;
396 
397   PetscFunctionBegin;
398   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
399   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr);
400   if (flg) mkl_set_num_threads((int)threads);
401 
402   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr);
403   if (flg) mat_mkl_cpardiso->maxfct = icntl;
404 
405   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr);
406   if (flg) mat_mkl_cpardiso->mnum = icntl;
407 
408   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr);
409   if (flg) mat_mkl_cpardiso->msglvl = icntl;
410 
411   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr);
412   if(flg){
413     mat_mkl_cpardiso->mtype = icntl;
414 #if defined(PETSC_USE_REAL_SINGLE)
415     mat_mkl_cpardiso->iparm[27] = 1;
416 #else
417     mat_mkl_cpardiso->iparm[27] = 0;
418 #endif
419     mat_mkl_cpardiso->iparm[34] = 1;
420   }
421   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr);
422 
423   if(flg && icntl != 0){
424     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr);
425     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
426 
427     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr);
428     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
429 
430     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr);
431     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
432 
433     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr);
434     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
435 
436     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr);
437     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
438 
439     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr);
440     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
441 
442     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr);
443     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
444 
445     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr);
446     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
447 
448     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
449       &flg);CHKERRQ(ierr);
450     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
451 
452     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
453       &flg);CHKERRQ(ierr);
454     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
455 
456     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr);
457     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
458 
459     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr);
460     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
461 
462     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr);
463     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
464 
465     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr);
466     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
467 
468     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr);
469     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
470 
471     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr);
472     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
473 
474     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr);
475     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
476 
477     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr);
478     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
479   }
480 
481   ierr = PetscOptionsEnd();CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
486 {
487   PetscErrorCode  ierr;
488   PetscMPIInt     size;
489 
490   PetscFunctionBegin;
491 
492   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
493   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
494 
495   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
496   mat_mkl_cpardiso->maxfct = 1;
497   mat_mkl_cpardiso->mnum = 1;
498   mat_mkl_cpardiso->n = A->rmap->N;
499   mat_mkl_cpardiso->msglvl = 0;
500   mat_mkl_cpardiso->nrhs = 1;
501   mat_mkl_cpardiso->err = 0;
502   mat_mkl_cpardiso->phase = -1;
503 #if defined(PETSC_USE_COMPLEX)
504   mat_mkl_cpardiso->mtype = 13;
505 #else
506   mat_mkl_cpardiso->mtype = 11;
507 #endif
508 
509 #if defined(PETSC_USE_REAL_SINGLE)
510   mat_mkl_cpardiso->iparm[27] = 1;
511 #else
512   mat_mkl_cpardiso->iparm[27] = 0;
513 #endif
514 
515   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
516 
517   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
518   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
519   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
520   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
521   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
522   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
523   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
524   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
525   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
526   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
527 
528   mat_mkl_cpardiso->iparm[39] = 0;
529   if (size > 1) {
530     mat_mkl_cpardiso->iparm[39] = 2;
531     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
532     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
533   }
534   mat_mkl_cpardiso->perm = 0;
535   PetscFunctionReturn(0);
536 }
537 
538 /*
539  * Symbolic decomposition. Mkl_Pardiso analysis phase.
540  */
541 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
542 {
543   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
544   PetscErrorCode  ierr;
545 
546   PetscFunctionBegin;
547   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
548 
549   /* Set MKL_CPARDISO options from the options database */
550   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
551   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
552 
553   mat_mkl_cpardiso->n = A->rmap->N;
554 
555   /* analysis phase */
556   /*----------------*/
557   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
558 
559   cluster_sparse_solver (
560     mat_mkl_cpardiso->pt,
561     &mat_mkl_cpardiso->maxfct,
562     &mat_mkl_cpardiso->mnum,
563     &mat_mkl_cpardiso->mtype,
564     &mat_mkl_cpardiso->phase,
565     &mat_mkl_cpardiso->n,
566     mat_mkl_cpardiso->a,
567     mat_mkl_cpardiso->ia,
568     mat_mkl_cpardiso->ja,
569     mat_mkl_cpardiso->perm,
570     &mat_mkl_cpardiso->nrhs,
571     mat_mkl_cpardiso->iparm,
572     &mat_mkl_cpardiso->msglvl,
573     NULL,
574     NULL,
575     &mat_mkl_cpardiso->comm_mkl_cpardiso,
576     (PetscInt*)&mat_mkl_cpardiso->err);
577 
578   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
579 
580   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
581   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
582   F->ops->solve           = MatSolve_MKL_CPARDISO;
583   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
584   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
585   PetscFunctionReturn(0);
586 }
587 
588 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
589 {
590   PetscErrorCode    ierr;
591   PetscBool         iascii;
592   PetscViewerFormat format;
593   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
594   PetscInt          i;
595 
596   PetscFunctionBegin;
597   /* check if matrix is mkl_cpardiso type */
598   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
599 
600   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
601   if (iascii) {
602     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
603     if (format == PETSC_VIEWER_ASCII_INFO) {
604       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
605       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
606       for(i = 1; i <= 64; i++){
607         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
608       }
609       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
610       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
611       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
612       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
613       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
614       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
615     }
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
621 {
622   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data;
623 
624   PetscFunctionBegin;
625   info->block_size        = 1.0;
626   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
627   info->nz_unneeded       = 0.0;
628   info->assemblies        = 0.0;
629   info->mallocs           = 0.0;
630   info->memory            = 0.0;
631   info->fill_ratio_given  = 0;
632   info->fill_ratio_needed = 0;
633   info->factor_mallocs    = 0;
634   PetscFunctionReturn(0);
635 }
636 
637 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
638 {
639   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data;
640 
641   PetscFunctionBegin;
642   if(icntl <= 64){
643     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
644   } else {
645     if(icntl == 65) mkl_set_num_threads((int)ival);
646     else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival;
647     else if(icntl == 67) mat_mkl_cpardiso->mnum = ival;
648     else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival;
649     else if(icntl == 69){
650       mat_mkl_cpardiso->mtype = ival;
651 #if defined(PETSC_USE_REAL_SINGLE)
652       mat_mkl_cpardiso->iparm[27] = 1;
653 #else
654       mat_mkl_cpardiso->iparm[27] = 0;
655 #endif
656       mat_mkl_cpardiso->iparm[34] = 1;
657     }
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 /*@
663   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
664 
665    Logically Collective on Mat
666 
667    Input Parameters:
668 +  F - the factored matrix obtained by calling MatGetFactor()
669 .  icntl - index of Mkl_Pardiso parameter
670 -  ival - value of Mkl_Pardiso parameter
671 
672   Options Database:
673 .   -mat_mkl_cpardiso_<icntl> <ival>
674 
675    Level: Intermediate
676 
677    Notes:
678     This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options
679           database approach when working with TS, SNES, or KSP.
680 
681    References:
682 .      Mkl_Pardiso Users' Guide
683 
684 .seealso: MatGetFactor()
685 @*/
686 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
687 {
688   PetscErrorCode ierr;
689 
690   PetscFunctionBegin;
691   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
696 {
697   PetscFunctionBegin;
698   *type = MATSOLVERMKL_CPARDISO;
699   PetscFunctionReturn(0);
700 }
701 
702 /* MatGetFactor for MPI AIJ matrices */
703 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
704 {
705   Mat              B;
706   PetscErrorCode   ierr;
707   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
708   PetscBool        isSeqAIJ;
709 
710   PetscFunctionBegin;
711   /* Create the factorization matrix */
712 
713   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
714   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
715   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
716   ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr);
717   ierr = MatSetUp(B);CHKERRQ(ierr);
718 
719   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
720 
721   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
722   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
723 
724   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
725   B->ops->destroy = MatDestroy_MKL_CPARDISO;
726 
727   B->ops->view    = MatView_MKL_CPARDISO;
728   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
729 
730   B->factortype   = ftype;
731   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
732 
733   B->data = mat_mkl_cpardiso;
734 
735   /* set solvertype */
736   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
737   ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr);
738 
739   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr);
740   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
741   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
742 
743   *F = B;
744   PetscFunctionReturn(0);
745 }
746 
747 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
748 {
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
753   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756