xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision 7ef47afce42d156f92fdb40b831aaef23c0c8efe)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #include <mkl.h>
12 #include <mkl_cluster_sparse_solver.h>
13 
14 /*
15  *  Possible mkl_cpardiso phases that controls the execution of the solver.
16  *  For more information check mkl_cpardiso manual.
17  */
18 #define JOB_ANALYSIS 11
19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
21 #define JOB_NUMERICAL_FACTORIZATION 22
22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
27 #define JOB_RELEASE_OF_LU_MEMORY 0
28 #define JOB_RELEASE_OF_ALL_MEMORY -1
29 
30 #define IPARM_SIZE 64
31 #define INT_TYPE MKL_INT
32 
33 static const char *Err_MSG_CPardiso(int errNo){
34   switch (errNo) {
35     case -1:
36       return "input inconsistent"; break;
37     case -2:
38       return "not enough memory"; break;
39     case -3:
40       return "reordering problem"; break;
41     case -4:
42       return "zero pivot, numerical factorization or iterative refinement problem"; break;
43     case -5:
44       return "unclassified (internal) error"; break;
45     case -6:
46       return "preordering failed (matrix types 11, 13 only)"; break;
47     case -7:
48       return "diagonal matrix problem"; break;
49     case -8:
50       return "32-bit integer overflow problem"; break;
51     case -9:
52       return "not enough memory for OOC"; break;
53     case -10:
54       return "problems with opening OOC temporary files"; break;
55     case -11:
56       return "read/write problems with the OOC data file"; break;
57     default :
58       return "unknown error";
59   }
60 }
61 
62 /*
63  *  Internal data structure.
64  *  For more information check mkl_cpardiso manual.
65  */
66 
67 typedef struct {
68 
69   /* Configuration vector */
70   INT_TYPE     iparm[IPARM_SIZE];
71 
72   /*
73    * Internal mkl_cpardiso memory location.
74    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
75    */
76   void         *pt[IPARM_SIZE];
77 
78   MPI_Comm     comm_mkl_cpardiso;
79 
80   /* Basic mkl_cpardiso info*/
81   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
82 
83   /* Matrix structure */
84   PetscScalar  *a;
85 
86   INT_TYPE     *ia, *ja;
87 
88   /* Number of non-zero elements */
89   INT_TYPE     nz;
90 
91   /* Row permutaton vector*/
92   INT_TYPE     *perm;
93 
94   /* Define is matrix preserve sparce structure. */
95   MatStructure matstruc;
96 
97   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
98 
99   /* True if mkl_cpardiso function have been used. */
100   PetscBool CleanUp;
101 } Mat_MKL_CPARDISO;
102 
103 /*
104  * Copy the elements of matrix A.
105  * Input:
106  *   - Mat A: MATSEQAIJ matrix
107  *   - int shift: matrix index.
108  *     - 0 for c representation
109  *     - 1 for fortran representation
110  *   - MatReuse reuse:
111  *     - MAT_INITIAL_MATRIX: Create a new aij representation
112  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
113  * Output:
114  *   - int *nnz: Number of nonzero-elements.
115  *   - int **r pointer to i index
116  *   - int **c pointer to j elements
117  *   - MATRIXTYPE **v: Non-zero elements
118  */
119 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
120 {
121   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
122 
123   PetscFunctionBegin;
124   *v=aa->a;
125   if (reuse == MAT_INITIAL_MATRIX) {
126     *r   = (INT_TYPE*)aa->i;
127     *c   = (INT_TYPE*)aa->j;
128     *nnz = aa->nz;
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
134 {
135   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
136   PetscErrorCode    ierr;
137   PetscInt          rstart,nz,i,j,countA,countB;
138   PetscInt          *row,*col;
139   const PetscScalar *av, *bv;
140   PetscScalar       *val;
141   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
142   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
143   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
144   PetscInt          colA_start,jB,jcol;
145 
146   PetscFunctionBegin;
147   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
148   av=aa->a; bv=bb->a;
149 
150   garray = mat->garray;
151 
152   if (reuse == MAT_INITIAL_MATRIX) {
153     nz   = aa->nz + bb->nz;
154     *nnz = nz;
155     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
156     col  = row + m + 1;
157     val  = (PetscScalar*)(col + nz);
158     *r = row; *c = col; *v = val;
159     row[0] = 0;
160   } else {
161     row = *r; col = *c; val = *v;
162   }
163 
164   nz = 0;
165   for (i=0; i<m; i++) {
166     row[i] = nz;
167     countA     = ai[i+1] - ai[i];
168     countB     = bi[i+1] - bi[i];
169     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
170     bjj        = bj + bi[i];
171 
172     /* B part, smaller col index */
173     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
174     jB         = 0;
175     for (j=0; j<countB; j++) {
176       jcol = garray[bjj[j]];
177       if (jcol > colA_start) {
178         jB = j;
179         break;
180       }
181       col[nz]   = jcol;
182       val[nz++] = *bv++;
183       if (j==countB-1) jB = countB;
184     }
185 
186     /* A part */
187     for (j=0; j<countA; j++) {
188       col[nz]   = rstart + ajj[j];
189       val[nz++] = *av++;
190     }
191 
192     /* B part, larger col index */
193     for (j=jB; j<countB; j++) {
194       col[nz]   = garray[bjj[j]];
195       val[nz++] = *bv++;
196     }
197   }
198   row[m] = nz;
199 
200   PetscFunctionReturn(0);
201 }
202 
203 /*
204  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
205  */
206 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
207 {
208   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
209   PetscErrorCode   ierr;
210 
211   PetscFunctionBegin;
212   /* Terminate instance, deallocate memories */
213   if (mat_mkl_cpardiso->CleanUp) {
214     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
215 
216     cluster_sparse_solver (
217       mat_mkl_cpardiso->pt,
218       &mat_mkl_cpardiso->maxfct,
219       &mat_mkl_cpardiso->mnum,
220       &mat_mkl_cpardiso->mtype,
221       &mat_mkl_cpardiso->phase,
222       &mat_mkl_cpardiso->n,
223       NULL,
224       NULL,
225       NULL,
226       mat_mkl_cpardiso->perm,
227       &mat_mkl_cpardiso->nrhs,
228       mat_mkl_cpardiso->iparm,
229       &mat_mkl_cpardiso->msglvl,
230       NULL,
231       NULL,
232       &mat_mkl_cpardiso->comm_mkl_cpardiso,
233       (int*)&mat_mkl_cpardiso->err);
234   }
235 
236   if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) {
237     ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr);
238   }
239   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
240   ierr = PetscFree(A->data);CHKERRQ(ierr);
241 
242   /* clear composed functions */
243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
244   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 /*
249  * Computes Ax = b
250  */
251 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
252 {
253   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
254   PetscErrorCode    ierr;
255   PetscScalar       *xarray;
256   const PetscScalar *barray;
257 
258   PetscFunctionBegin;
259   mat_mkl_cpardiso->nrhs = 1;
260   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
261   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
262 
263   /* solve phase */
264   /*-------------*/
265   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
266   cluster_sparse_solver (
267     mat_mkl_cpardiso->pt,
268     &mat_mkl_cpardiso->maxfct,
269     &mat_mkl_cpardiso->mnum,
270     &mat_mkl_cpardiso->mtype,
271     &mat_mkl_cpardiso->phase,
272     &mat_mkl_cpardiso->n,
273     mat_mkl_cpardiso->a,
274     mat_mkl_cpardiso->ia,
275     mat_mkl_cpardiso->ja,
276     mat_mkl_cpardiso->perm,
277     &mat_mkl_cpardiso->nrhs,
278     mat_mkl_cpardiso->iparm,
279     &mat_mkl_cpardiso->msglvl,
280     (void*)barray,
281     (void*)xarray,
282     &mat_mkl_cpardiso->comm_mkl_cpardiso,
283     (int*)&mat_mkl_cpardiso->err);
284 
285   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
286 
287   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
288   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
289   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
290   PetscFunctionReturn(0);
291 }
292 
293 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
294 {
295   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
296   PetscErrorCode   ierr;
297 
298   PetscFunctionBegin;
299 #if defined(PETSC_USE_COMPLEX)
300   mat_mkl_cpardiso->iparm[12 - 1] = 1;
301 #else
302   mat_mkl_cpardiso->iparm[12 - 1] = 2;
303 #endif
304   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
305   mat_mkl_cpardiso->iparm[12 - 1] = 0;
306   PetscFunctionReturn(0);
307 }
308 
309 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
310 {
311   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
312   PetscErrorCode    ierr;
313   PetscScalar       *xarray;
314   const PetscScalar *barray;
315 
316   PetscFunctionBegin;
317   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
318 
319   if(mat_mkl_cpardiso->nrhs > 0){
320     ierr = MatDenseGetArrayRead(B,&barray);
321     ierr = MatDenseGetArray(X,&xarray);
322 
323     /* solve phase */
324     /*-------------*/
325     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
326     cluster_sparse_solver (
327       mat_mkl_cpardiso->pt,
328       &mat_mkl_cpardiso->maxfct,
329       &mat_mkl_cpardiso->mnum,
330       &mat_mkl_cpardiso->mtype,
331       &mat_mkl_cpardiso->phase,
332       &mat_mkl_cpardiso->n,
333       mat_mkl_cpardiso->a,
334       mat_mkl_cpardiso->ia,
335       mat_mkl_cpardiso->ja,
336       mat_mkl_cpardiso->perm,
337       &mat_mkl_cpardiso->nrhs,
338       mat_mkl_cpardiso->iparm,
339       &mat_mkl_cpardiso->msglvl,
340       (void*)barray,
341       (void*)xarray,
342       &mat_mkl_cpardiso->comm_mkl_cpardiso,
343       (int*)&mat_mkl_cpardiso->err);
344     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
345     ierr = MatDenseRestoreArrayRead(B,&barray);
346     ierr = MatDenseRestoreArray(X,&xarray);
347 
348   }
349   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
350   PetscFunctionReturn(0);
351 
352 }
353 
354 /*
355  * LU Decomposition
356  */
357 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
358 {
359   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
360   PetscErrorCode   ierr;
361 
362   PetscFunctionBegin;
363   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
364   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
365 
366   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
367   cluster_sparse_solver (
368     mat_mkl_cpardiso->pt,
369     &mat_mkl_cpardiso->maxfct,
370     &mat_mkl_cpardiso->mnum,
371     &mat_mkl_cpardiso->mtype,
372     &mat_mkl_cpardiso->phase,
373     &mat_mkl_cpardiso->n,
374     mat_mkl_cpardiso->a,
375     mat_mkl_cpardiso->ia,
376     mat_mkl_cpardiso->ja,
377     mat_mkl_cpardiso->perm,
378     &mat_mkl_cpardiso->nrhs,
379     mat_mkl_cpardiso->iparm,
380     &mat_mkl_cpardiso->msglvl,
381     NULL,
382     NULL,
383     &mat_mkl_cpardiso->comm_mkl_cpardiso,
384     &mat_mkl_cpardiso->err);
385   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
386 
387   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
388   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
389   PetscFunctionReturn(0);
390 }
391 
392 /* Sets mkl_cpardiso options from the options database */
393 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
394 {
395   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
396   PetscErrorCode      ierr;
397   PetscInt            icntl;
398   PetscBool           flg;
399   int                 threads;
400 
401   PetscFunctionBegin;
402   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
403   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr);
404   if (flg) mkl_set_num_threads(threads);
405 
406   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr);
407   if (flg) mat_mkl_cpardiso->maxfct = icntl;
408 
409   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr);
410   if (flg) mat_mkl_cpardiso->mnum = icntl;
411 
412   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr);
413   if (flg) mat_mkl_cpardiso->msglvl = icntl;
414 
415   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr);
416   if(flg){
417     mat_mkl_cpardiso->mtype = icntl;
418 #if defined(PETSC_USE_REAL_SINGLE)
419     mat_mkl_cpardiso->iparm[27] = 1;
420 #else
421     mat_mkl_cpardiso->iparm[27] = 0;
422 #endif
423     mat_mkl_cpardiso->iparm[34] = 1;
424   }
425   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr);
426 
427   if(flg && icntl != 0){
428     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr);
429     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
430 
431     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr);
432     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
433 
434     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr);
435     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
436 
437     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr);
438     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
439 
440     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr);
441     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
442 
443     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr);
444     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
445 
446     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr);
447     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
448 
449     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr);
450     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
451 
452     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
453       &flg);CHKERRQ(ierr);
454     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
455 
456     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
457       &flg);CHKERRQ(ierr);
458     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
459 
460     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr);
461     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
462 
463     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr);
464     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
465 
466     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr);
467     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
468 
469     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr);
470     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
471 
472     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr);
473     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
474 
475     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr);
476     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
477 
478     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr);
479     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
480 
481     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr);
482     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
483   }
484 
485   ierr = PetscOptionsEnd();CHKERRQ(ierr);
486   PetscFunctionReturn(0);
487 }
488 
489 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
490 {
491   PetscErrorCode  ierr;
492   PetscMPIInt     size;
493 
494   PetscFunctionBegin;
495 
496   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
497   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
498 
499   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
500   mat_mkl_cpardiso->maxfct = 1;
501   mat_mkl_cpardiso->mnum = 1;
502   mat_mkl_cpardiso->n = A->rmap->N;
503   mat_mkl_cpardiso->msglvl = 0;
504   mat_mkl_cpardiso->nrhs = 1;
505   mat_mkl_cpardiso->err = 0;
506   mat_mkl_cpardiso->phase = -1;
507 #if defined(PETSC_USE_COMPLEX)
508   mat_mkl_cpardiso->mtype = 13;
509 #else
510   mat_mkl_cpardiso->mtype = 11;
511 #endif
512 
513 #if defined(PETSC_USE_REAL_SINGLE)
514   mat_mkl_cpardiso->iparm[27] = 1;
515 #else
516   mat_mkl_cpardiso->iparm[27] = 0;
517 #endif
518 
519   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
520 
521   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
522   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
523   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
524   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
525   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
526   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
527   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
528   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
529   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
530   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
531 
532   mat_mkl_cpardiso->iparm[39] = 0;
533   if (size > 1) {
534     mat_mkl_cpardiso->iparm[39] = 2;
535     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
536     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
537   }
538   mat_mkl_cpardiso->perm = 0;
539   PetscFunctionReturn(0);
540 }
541 
542 /*
543  * Symbolic decomposition. Mkl_Pardiso analysis phase.
544  */
545 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
546 {
547   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
548   PetscErrorCode  ierr;
549 
550   PetscFunctionBegin;
551   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
552 
553   /* Set MKL_CPARDISO options from the options database */
554   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
555   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
556 
557   mat_mkl_cpardiso->n = A->rmap->N;
558 
559   /* analysis phase */
560   /*----------------*/
561   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
562 
563   cluster_sparse_solver (
564     mat_mkl_cpardiso->pt,
565     &mat_mkl_cpardiso->maxfct,
566     &mat_mkl_cpardiso->mnum,
567     &mat_mkl_cpardiso->mtype,
568     &mat_mkl_cpardiso->phase,
569     &mat_mkl_cpardiso->n,
570     mat_mkl_cpardiso->a,
571     mat_mkl_cpardiso->ia,
572     mat_mkl_cpardiso->ja,
573     mat_mkl_cpardiso->perm,
574     &mat_mkl_cpardiso->nrhs,
575     mat_mkl_cpardiso->iparm,
576     &mat_mkl_cpardiso->msglvl,
577     NULL,
578     NULL,
579     &mat_mkl_cpardiso->comm_mkl_cpardiso,
580     (int*)&mat_mkl_cpardiso->err);
581 
582   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
583 
584   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
585   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
586   F->ops->solve           = MatSolve_MKL_CPARDISO;
587   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
588   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
589   PetscFunctionReturn(0);
590 }
591 
592 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
593 {
594   PetscErrorCode    ierr;
595   PetscBool         iascii;
596   PetscViewerFormat format;
597   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
598   PetscInt          i;
599 
600   PetscFunctionBegin;
601   /* check if matrix is mkl_cpardiso type */
602   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
603 
604   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
605   if (iascii) {
606     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
607     if (format == PETSC_VIEWER_ASCII_INFO) {
608       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
609       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
610       for(i = 1; i <= 64; i++){
611         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
612       }
613       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
614       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
615       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
616       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
617       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
618       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
619     }
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
625 {
626   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data;
627 
628   PetscFunctionBegin;
629   info->block_size        = 1.0;
630   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
631   info->nz_unneeded       = 0.0;
632   info->assemblies        = 0.0;
633   info->mallocs           = 0.0;
634   info->memory            = 0.0;
635   info->fill_ratio_given  = 0;
636   info->fill_ratio_needed = 0;
637   info->factor_mallocs    = 0;
638   PetscFunctionReturn(0);
639 }
640 
641 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
642 {
643   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data;
644 
645   PetscFunctionBegin;
646   if(icntl <= 64){
647     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
648   } else {
649     if(icntl == 65) mkl_set_num_threads((int)ival);
650     else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival;
651     else if(icntl == 67) mat_mkl_cpardiso->mnum = ival;
652     else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival;
653     else if(icntl == 69){
654       mat_mkl_cpardiso->mtype = ival;
655 #if defined(PETSC_USE_REAL_SINGLE)
656       mat_mkl_cpardiso->iparm[27] = 1;
657 #else
658       mat_mkl_cpardiso->iparm[27] = 0;
659 #endif
660       mat_mkl_cpardiso->iparm[34] = 1;
661     }
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 /*@
667   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
668 
669    Logically Collective on Mat
670 
671    Input Parameters:
672 +  F - the factored matrix obtained by calling MatGetFactor()
673 .  icntl - index of Mkl_Pardiso parameter
674 -  ival - value of Mkl_Pardiso parameter
675 
676   Options Database:
677 .   -mat_mkl_cpardiso_<icntl> <ival>
678 
679    Level: Intermediate
680 
681    Notes: This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options
682           database approach when working with TS, SNES, or KSP.
683 
684    References:
685 .      Mkl_Pardiso Users' Guide
686 
687 .seealso: MatGetFactor()
688 @*/
689 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
690 {
691   PetscErrorCode ierr;
692 
693   PetscFunctionBegin;
694   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
695   PetscFunctionReturn(0);
696 }
697 
698 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
699 {
700   PetscFunctionBegin;
701   *type = MATSOLVERMKL_CPARDISO;
702   PetscFunctionReturn(0);
703 }
704 
705 /* MatGetFactor for MPI AIJ matrices */
706 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
707 {
708   Mat              B;
709   PetscErrorCode   ierr;
710   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
711   PetscBool        isSeqAIJ;
712 
713   PetscFunctionBegin;
714   /* Create the factorization matrix */
715 
716   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
717   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
718   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
719   ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr);
720   ierr = MatSetUp(B);CHKERRQ(ierr);
721 
722   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
723 
724   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
725   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
726 
727   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
728   B->ops->destroy = MatDestroy_MKL_CPARDISO;
729 
730   B->ops->view    = MatView_MKL_CPARDISO;
731   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
732 
733   B->factortype   = ftype;
734   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
735 
736   B->data = mat_mkl_cpardiso;
737 
738   /* set solvertype */
739   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
740   ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr);
741 
742   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr);
743   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
744   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
745 
746   *F = B;
747   PetscFunctionReturn(0);
748 }
749 
750 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
751 {
752   PetscErrorCode ierr;
753 
754   PetscFunctionBegin;
755   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
756   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759