xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision 660873c63b73c9c42ba8d553c05bcbff94725f98)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #include <mkl.h>
12 #include <mkl_cluster_sparse_solver.h>
13 
14 /*
15  *  Possible mkl_cpardiso phases that controls the execution of the solver.
16  *  For more information check mkl_cpardiso manual.
17  */
18 #define JOB_ANALYSIS 11
19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
21 #define JOB_NUMERICAL_FACTORIZATION 22
22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
27 #define JOB_RELEASE_OF_LU_MEMORY 0
28 #define JOB_RELEASE_OF_ALL_MEMORY -1
29 
30 #define IPARM_SIZE 64
31 #define INT_TYPE MKL_INT
32 
33 static const char *Err_MSG_CPardiso(int errNo){
34   switch (errNo) {
35     case -1:
36       return "input inconsistent"; break;
37     case -2:
38       return "not enough memory"; break;
39     case -3:
40       return "reordering problem"; break;
41     case -4:
42       return "zero pivot, numerical factorization or iterative refinement problem"; break;
43     case -5:
44       return "unclassified (internal) error"; break;
45     case -6:
46       return "preordering failed (matrix types 11, 13 only)"; break;
47     case -7:
48       return "diagonal matrix problem"; break;
49     case -8:
50       return "32-bit integer overflow problem"; break;
51     case -9:
52       return "not enough memory for OOC"; break;
53     case -10:
54       return "problems with opening OOC temporary files"; break;
55     case -11:
56       return "read/write problems with the OOC data file"; break;
57     default :
58       return "unknown error";
59   }
60 }
61 
62 /*
63  *  Internal data structure.
64  *  For more information check mkl_cpardiso manual.
65  */
66 
67 typedef struct {
68 
69   /* Configuration vector */
70   INT_TYPE     iparm[IPARM_SIZE];
71 
72   /*
73    * Internal mkl_cpardiso memory location.
74    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
75    */
76   void         *pt[IPARM_SIZE];
77 
78   MPI_Comm     comm_mkl_cpardiso;
79 
80   /* Basic mkl_cpardiso info*/
81   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
82 
83   /* Matrix structure */
84   PetscScalar  *a;
85 
86   INT_TYPE     *ia, *ja;
87 
88   /* Number of non-zero elements */
89   INT_TYPE     nz;
90 
91   /* Row permutaton vector*/
92   INT_TYPE     *perm;
93 
94   /* Define is matrix preserve sparce structure. */
95   MatStructure matstruc;
96 
97   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
98 
99   /* True if mkl_cpardiso function have been used. */
100   PetscBool CleanUp;
101 } Mat_MKL_CPARDISO;
102 
103 /*
104  * Copy the elements of matrix A.
105  * Input:
106  *   - Mat A: MATSEQAIJ matrix
107  *   - int shift: matrix index.
108  *     - 0 for c representation
109  *     - 1 for fortran representation
110  *   - MatReuse reuse:
111  *     - MAT_INITIAL_MATRIX: Create a new aij representation
112  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
113  * Output:
114  *   - int *nnz: Number of nonzero-elements.
115  *   - int **r pointer to i index
116  *   - int **c pointer to j elements
117  *   - MATRIXTYPE **v: Non-zero elements
118  */
119 #undef __FUNCT__
120 #define __FUNCT__ "MatCopy_seqaij_seqaij_MKL_CPARDISO"
121 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
122 {
123   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
124 
125   PetscFunctionBegin;
126   *v=aa->a;
127   if (reuse == MAT_INITIAL_MATRIX) {
128     *r   = (INT_TYPE*)aa->i;
129     *c   = (INT_TYPE*)aa->j;
130     *nnz = aa->nz;
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO"
137 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
138 {
139   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
140   PetscErrorCode    ierr;
141   PetscInt          rstart,nz,i,j,jj,irow,countA,countB;
142   PetscInt          *row,*col;
143   const PetscScalar *av, *bv,*v1,*v2;
144   PetscScalar       *val;
145   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
146   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
147   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
148   PetscInt          nn, colA_start,jB,jcol;
149 
150   PetscFunctionBegin;
151   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
152   av=aa->a; bv=bb->a;
153 
154   garray = mat->garray;
155 
156   if (reuse == MAT_INITIAL_MATRIX) {
157     nz   = aa->nz + bb->nz;
158     *nnz = nz;
159     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
160     col  = row + m + 1;
161     val  = (PetscScalar*)(col + nz);
162     *r = row; *c = col; *v = val;
163     row[0] = 0;
164   } else {
165     row = *r; col = *c; val = *v;
166   }
167 
168   nz = 0;
169   for (i=0; i<m; i++) {
170     row[i] = nz;
171     countA     = ai[i+1] - ai[i];
172     countB     = bi[i+1] - bi[i];
173     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
174     bjj        = bj + bi[i];
175 
176     /* B part, smaller col index */
177     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
178     jB         = 0;
179     for (j=0; j<countB; j++) {
180       jcol = garray[bjj[j]];
181       if (jcol > colA_start) {
182         jB = j;
183         break;
184       }
185       col[nz]   = jcol;
186       val[nz++] = *bv++;
187       if (j==countB-1) jB = countB;
188     }
189 
190     /* A part */
191     for (j=0; j<countA; j++) {
192       col[nz]   = rstart + ajj[j];
193       val[nz++] = *av++;
194     }
195 
196     /* B part, larger col index */
197     for (j=jB; j<countB; j++) {
198       col[nz]   = garray[bjj[j]];
199       val[nz++] = *bv++;
200     }
201   }
202   row[m] = nz;
203 
204   PetscFunctionReturn(0);
205 }
206 
207 /*
208  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
209  */
210 #undef __FUNCT__
211 #define __FUNCT__ "MatDestroy_MKL_CPARDISO"
212 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
213 {
214   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
215   PetscErrorCode   ierr;
216 
217   PetscFunctionBegin;
218   /* Terminate instance, deallocate memories */
219   if (mat_mkl_cpardiso->CleanUp) {
220     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
221 
222     cluster_sparse_solver (
223       mat_mkl_cpardiso->pt,
224       &mat_mkl_cpardiso->maxfct,
225       &mat_mkl_cpardiso->mnum,
226       &mat_mkl_cpardiso->mtype,
227       &mat_mkl_cpardiso->phase,
228       &mat_mkl_cpardiso->n,
229       NULL,
230       NULL,
231       NULL,
232       mat_mkl_cpardiso->perm,
233       &mat_mkl_cpardiso->nrhs,
234       mat_mkl_cpardiso->iparm,
235       &mat_mkl_cpardiso->msglvl,
236       NULL,
237       NULL,
238       &mat_mkl_cpardiso->comm_mkl_cpardiso,
239       &mat_mkl_cpardiso->err);
240   }
241 
242   ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr);
243   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
244   ierr = PetscFree(A->data);CHKERRQ(ierr);
245 
246   /* clear composed functions */
247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
248   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
249   PetscFunctionReturn(0);
250 }
251 
252 /*
253  * Computes Ax = b
254  */
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSolve_MKL_CPARDISO"
257 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
258 {
259   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
260   PetscErrorCode    ierr;
261   PetscScalar       *xarray;
262   const PetscScalar *barray;
263 
264   PetscFunctionBegin;
265   mat_mkl_cpardiso->nrhs = 1;
266   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
267   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
268 
269   /* solve phase */
270   /*-------------*/
271   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
272   cluster_sparse_solver (
273     mat_mkl_cpardiso->pt,
274     &mat_mkl_cpardiso->maxfct,
275     &mat_mkl_cpardiso->mnum,
276     &mat_mkl_cpardiso->mtype,
277     &mat_mkl_cpardiso->phase,
278     &mat_mkl_cpardiso->n,
279     mat_mkl_cpardiso->a,
280     mat_mkl_cpardiso->ia,
281     mat_mkl_cpardiso->ja,
282     mat_mkl_cpardiso->perm,
283     &mat_mkl_cpardiso->nrhs,
284     mat_mkl_cpardiso->iparm,
285     &mat_mkl_cpardiso->msglvl,
286     (void*)barray,
287     (void*)xarray,
288     &mat_mkl_cpardiso->comm_mkl_cpardiso,
289     &mat_mkl_cpardiso->err);
290 
291   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
292 
293   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
294   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
295   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
296   PetscFunctionReturn(0);
297 }
298 
299 #undef __FUNCT__
300 #define __FUNCT__ "MatSolveTranspose_MKL_CPARDISO"
301 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
302 {
303   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
304   PetscErrorCode   ierr;
305 
306   PetscFunctionBegin;
307 #if defined(PETSC_USE_COMPLEX)
308   mat_mkl_cpardiso->iparm[12 - 1] = 1;
309 #else
310   mat_mkl_cpardiso->iparm[12 - 1] = 2;
311 #endif
312   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
313   mat_mkl_cpardiso->iparm[12 - 1] = 0;
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatMatSolve_MKL_CPARDISO"
319 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
320 {
321   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
322   PetscErrorCode    ierr;
323   PetscScalar       *barray, *xarray;
324   PetscBool         flg;
325 
326   PetscFunctionBegin;
327   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr);
328   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
329   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr);
330   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
331 
332   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
333 
334   if(mat_mkl_cpardiso->nrhs > 0){
335     ierr = MatDenseGetArray(B,&barray);
336     ierr = MatDenseGetArray(X,&xarray);
337 
338     /* solve phase */
339     /*-------------*/
340     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
341     cluster_sparse_solver (
342       mat_mkl_cpardiso->pt,
343       &mat_mkl_cpardiso->maxfct,
344       &mat_mkl_cpardiso->mnum,
345       &mat_mkl_cpardiso->mtype,
346       &mat_mkl_cpardiso->phase,
347       &mat_mkl_cpardiso->n,
348       mat_mkl_cpardiso->a,
349       mat_mkl_cpardiso->ia,
350       mat_mkl_cpardiso->ja,
351       mat_mkl_cpardiso->perm,
352       &mat_mkl_cpardiso->nrhs,
353       mat_mkl_cpardiso->iparm,
354       &mat_mkl_cpardiso->msglvl,
355       (void*)barray,
356       (void*)xarray,
357       &mat_mkl_cpardiso->comm_mkl_cpardiso,
358       &mat_mkl_cpardiso->err);
359     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
360   }
361   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
362   PetscFunctionReturn(0);
363 
364 }
365 
366 /*
367  * LU Decomposition
368  */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatFactorNumeric_MKL_CPARDISO"
371 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
372 {
373   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
374   PetscErrorCode   ierr;
375 
376   PetscFunctionBegin;
377   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
378   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
379 
380   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
381   cluster_sparse_solver (
382     mat_mkl_cpardiso->pt,
383     &mat_mkl_cpardiso->maxfct,
384     &mat_mkl_cpardiso->mnum,
385     &mat_mkl_cpardiso->mtype,
386     &mat_mkl_cpardiso->phase,
387     &mat_mkl_cpardiso->n,
388     mat_mkl_cpardiso->a,
389     mat_mkl_cpardiso->ia,
390     mat_mkl_cpardiso->ja,
391     mat_mkl_cpardiso->perm,
392     &mat_mkl_cpardiso->nrhs,
393     mat_mkl_cpardiso->iparm,
394     &mat_mkl_cpardiso->msglvl,
395     NULL,
396     NULL,
397     &mat_mkl_cpardiso->comm_mkl_cpardiso,
398     &mat_mkl_cpardiso->err);
399   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
400 
401   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
402   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
403   PetscFunctionReturn(0);
404 }
405 
406 /* Sets mkl_cpardiso options from the options database */
407 #undef __FUNCT__
408 #define __FUNCT__ "PetscSetMKL_CPARDISOFromOptions"
409 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
410 {
411   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
412   PetscErrorCode      ierr;
413   PetscInt            icntl;
414   PetscBool           flg;
415   int                 pt[IPARM_SIZE], threads;
416 
417   PetscFunctionBegin;
418   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
419   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr);
420   if (flg) mkl_set_num_threads(threads);
421 
422   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr);
423   if (flg) mat_mkl_cpardiso->maxfct = icntl;
424 
425   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr);
426   if (flg) mat_mkl_cpardiso->mnum = icntl;
427 
428   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr);
429   if (flg) mat_mkl_cpardiso->msglvl = icntl;
430 
431   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr);
432   if(flg){
433     mat_mkl_cpardiso->mtype = icntl;
434 #if defined(PETSC_USE_REAL_SINGLE)
435     mat_mkl_cpardiso->iparm[27] = 1;
436 #else
437     mat_mkl_cpardiso->iparm[27] = 0;
438 #endif
439     mat_mkl_cpardiso->iparm[34] = 1;
440   }
441   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr);
442 
443   if(flg && icntl != 0){
444     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr);
445     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
446 
447     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr);
448     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
449 
450     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr);
451     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
452 
453     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr);
454     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
455 
456     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr);
457     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
458 
459     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr);
460     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
461 
462     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr);
463     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
464 
465     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr);
466     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
467 
468     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
469       &flg);CHKERRQ(ierr);
470     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
471 
472     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
473       &flg);CHKERRQ(ierr);
474     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
475 
476     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr);
477     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
478 
479     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr);
480     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
481 
482     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr);
483     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
484 
485     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr);
486     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
487 
488     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr);
489     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
490 
491     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr);
492     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
493 
494     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr);
495     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
496 
497     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr);
498     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
499   }
500 
501   ierr = PetscOptionsEnd();CHKERRQ(ierr);
502   PetscFunctionReturn(0);
503 }
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "PetscInitialize_MKL_CPARDISO"
507 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
508 {
509   PetscErrorCode  ierr;
510   PetscInt        i;
511   PetscMPIInt     size;
512 
513   PetscFunctionBegin;
514 
515   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
516   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
517 
518   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
519   mat_mkl_cpardiso->maxfct = 1;
520   mat_mkl_cpardiso->mnum = 1;
521   mat_mkl_cpardiso->n = A->rmap->N;
522   mat_mkl_cpardiso->msglvl = 0;
523   mat_mkl_cpardiso->nrhs = 1;
524   mat_mkl_cpardiso->err = 0;
525   mat_mkl_cpardiso->phase = -1;
526 #if defined(PETSC_USE_COMPLEX)
527   mat_mkl_cpardiso->mtype = 13;
528 #else
529   mat_mkl_cpardiso->mtype = 11;
530 #endif
531 
532 #if defined(PETSC_USE_REAL_SINGLE)
533   mat_mkl_cpardiso->iparm[27] = 1;
534 #else
535   mat_mkl_cpardiso->iparm[27] = 0;
536 #endif
537 
538   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
539 
540   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
541   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
542   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
543   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
544   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
545   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
546   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
547   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
548   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
549   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
550 
551   mat_mkl_cpardiso->iparm[39] = 0;
552   if (size > 1) {
553     mat_mkl_cpardiso->iparm[39] = 2;
554     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
555     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
556   }
557   mat_mkl_cpardiso->perm = 0;
558   PetscFunctionReturn(0);
559 }
560 
561 /*
562  * Symbolic decomposition. Mkl_Pardiso analysis phase.
563  */
564 #undef __FUNCT__
565 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_CPARDISO"
566 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
567 {
568   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
569   PetscErrorCode  ierr;
570 
571   PetscFunctionBegin;
572   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
573 
574   /* Set MKL_CPARDISO options from the options database */
575   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
576   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
577 
578   mat_mkl_cpardiso->n = A->rmap->N;
579 
580   /* analysis phase */
581   /*----------------*/
582   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
583 
584   cluster_sparse_solver (
585     mat_mkl_cpardiso->pt,
586     &mat_mkl_cpardiso->maxfct,
587     &mat_mkl_cpardiso->mnum,
588     &mat_mkl_cpardiso->mtype,
589     &mat_mkl_cpardiso->phase,
590     &mat_mkl_cpardiso->n,
591     mat_mkl_cpardiso->a,
592     mat_mkl_cpardiso->ia,
593     mat_mkl_cpardiso->ja,
594     mat_mkl_cpardiso->perm,
595     &mat_mkl_cpardiso->nrhs,
596     mat_mkl_cpardiso->iparm,
597     &mat_mkl_cpardiso->msglvl,
598     NULL,
599     NULL,
600     &mat_mkl_cpardiso->comm_mkl_cpardiso,
601     &mat_mkl_cpardiso->err);
602 
603   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
604 
605   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
606   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
607   F->ops->solve           = MatSolve_MKL_CPARDISO;
608   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
609   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatView_MKL_CPARDISO"
615 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
616 {
617   PetscErrorCode    ierr;
618   PetscBool         iascii;
619   PetscViewerFormat format;
620   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
621   PetscInt          i;
622 
623   PetscFunctionBegin;
624   /* check if matrix is mkl_cpardiso type */
625   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
626 
627   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
628   if (iascii) {
629     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630     if (format == PETSC_VIEWER_ASCII_INFO) {
631       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
632       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
633       for(i = 1; i <= 64; i++){
634         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
635       }
636       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
637       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
638       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
639       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
640       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
641       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
642     }
643   }
644   PetscFunctionReturn(0);
645 }
646 
647 #undef __FUNCT__
648 #define __FUNCT__ "MatGetInfo_MKL_CPARDISO"
649 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
650 {
651   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data;
652 
653   PetscFunctionBegin;
654   info->block_size        = 1.0;
655   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
656   info->nz_unneeded       = 0.0;
657   info->assemblies        = 0.0;
658   info->mallocs           = 0.0;
659   info->memory            = 0.0;
660   info->fill_ratio_given  = 0;
661   info->fill_ratio_needed = 0;
662   info->factor_mallocs    = 0;
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatMkl_CPardisoSetCntl_MKL_CPARDISO"
668 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
669 {
670   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data;
671 
672   PetscFunctionBegin;
673   if(icntl <= 64){
674     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
675   } else {
676     if(icntl == 65) mkl_set_num_threads((int)ival);
677     else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival;
678     else if(icntl == 67) mat_mkl_cpardiso->mnum = ival;
679     else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival;
680     else if(icntl == 69){
681       int pt[IPARM_SIZE];
682       mat_mkl_cpardiso->mtype = ival;
683 #if defined(PETSC_USE_REAL_SINGLE)
684       mat_mkl_cpardiso->iparm[27] = 1;
685 #else
686       mat_mkl_cpardiso->iparm[27] = 0;
687 #endif
688       mat_mkl_cpardiso->iparm[34] = 1;
689     }
690   }
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatMkl_CPardisoSetCntl"
696 /*@
697   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
698 
699    Logically Collective on Mat
700 
701    Input Parameters:
702 +  F - the factored matrix obtained by calling MatGetFactor()
703 .  icntl - index of Mkl_Pardiso parameter
704 -  ival - value of Mkl_Pardiso parameter
705 
706   Options Database:
707 .   -mat_mkl_cpardiso_<icntl> <ival>
708 
709    Level: beginner
710 
711    References:
712 .      Mkl_Pardiso Users' Guide
713 
714 .seealso: MatGetFactor()
715 @*/
716 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
717 {
718   PetscErrorCode ierr;
719 
720   PetscFunctionBegin;
721   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_cpardiso"
727 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type)
728 {
729   PetscFunctionBegin;
730   *type = MATSOLVERMKL_CPARDISO;
731   PetscFunctionReturn(0);
732 }
733 
734 /* MatGetFactor for MPI AIJ matrices */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatGetFactor_mpiaij_mkl_cpardiso"
737 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
738 {
739   Mat              B;
740   PetscErrorCode   ierr;
741   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
742   PetscBool        isSeqAIJ;
743 
744   PetscFunctionBegin;
745   /* Create the factorization matrix */
746 
747   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
748   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
749   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
750   ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr);
751   ierr = MatSetUp(B);CHKERRQ(ierr);
752 
753   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
754 
755   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
756   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
757 
758   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
759   B->ops->destroy = MatDestroy_MKL_CPARDISO;
760 
761   B->ops->view    = MatView_MKL_CPARDISO;
762   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
763 
764   B->factortype   = ftype;
765   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
766 
767   B->data = mat_mkl_cpardiso;
768 
769   /* set solvertype */
770   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
771   ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr);
772 
773   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr);
774   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
775   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
776 
777   *F = B;
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatSolverPackageRegister_MKL_CPardiso"
783 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void)
784 {
785   PetscErrorCode ierr;
786 
787   PetscFunctionBegin;
788   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
789   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
790   PetscFunctionReturn(0);
791 }
792