xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision b2c5b070e1e9d4e3e2dc30d64b22ae7eaf7f82f3)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #include <mkl.h>
12 #include <mkl_cluster_sparse_solver.h>
13 
14 /*
15  *  Possible mkl_cpardiso phases that controls the execution of the solver.
16  *  For more information check mkl_cpardiso manual.
17  */
18 #define JOB_ANALYSIS 11
19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
21 #define JOB_NUMERICAL_FACTORIZATION 22
22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
27 #define JOB_RELEASE_OF_LU_MEMORY 0
28 #define JOB_RELEASE_OF_ALL_MEMORY -1
29 
30 #define IPARM_SIZE 64
31 #define INT_TYPE MKL_INT
32 
33 static const char *Err_MSG_CPardiso(int errNo){
34   switch (errNo) {
35     case -1:
36       return "input inconsistent"; break;
37     case -2:
38       return "not enough memory"; break;
39     case -3:
40       return "reordering problem"; break;
41     case -4:
42       return "zero pivot, numerical factorization or iterative refinement problem"; break;
43     case -5:
44       return "unclassified (internal) error"; break;
45     case -6:
46       return "preordering failed (matrix types 11, 13 only)"; break;
47     case -7:
48       return "diagonal matrix problem"; break;
49     case -8:
50       return "32-bit integer overflow problem"; break;
51     case -9:
52       return "not enough memory for OOC"; break;
53     case -10:
54       return "problems with opening OOC temporary files"; break;
55     case -11:
56       return "read/write problems with the OOC data file"; break;
57     default :
58       return "unknown error";
59   }
60 }
61 
62 /*
63  *  Internal data structure.
64  *  For more information check mkl_cpardiso manual.
65  */
66 
67 typedef struct {
68 
69   /* Configuration vector */
70   INT_TYPE     iparm[IPARM_SIZE];
71 
72   /*
73    * Internal mkl_cpardiso memory location.
74    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
75    */
76   void         *pt[IPARM_SIZE];
77 
78   MPI_Comm     comm_mkl_cpardiso;
79 
80   /* Basic mkl_cpardiso info*/
81   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
82 
83   /* Matrix structure */
84   PetscScalar  *a;
85 
86   INT_TYPE     *ia, *ja;
87 
88   /* Number of non-zero elements */
89   INT_TYPE     nz;
90 
91   /* Row permutaton vector*/
92   INT_TYPE     *perm;
93 
94   /* Define is matrix preserve sparce structure. */
95   MatStructure matstruc;
96 
97   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
98 
99   /* True if mkl_cpardiso function have been used. */
100   PetscBool CleanUp;
101 } Mat_MKL_CPARDISO;
102 
103 /*
104  * Copy the elements of matrix A.
105  * Input:
106  *   - Mat A: MATSEQAIJ matrix
107  *   - int shift: matrix index.
108  *     - 0 for c representation
109  *     - 1 for fortran representation
110  *   - MatReuse reuse:
111  *     - MAT_INITIAL_MATRIX: Create a new aij representation
112  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
113  * Output:
114  *   - int *nnz: Number of nonzero-elements.
115  *   - int **r pointer to i index
116  *   - int **c pointer to j elements
117  *   - MATRIXTYPE **v: Non-zero elements
118  */
119 #undef __FUNCT__
120 #define __FUNCT__ "MatCopy_seqaij_seqaij_MKL_CPARDISO"
121 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
122 {
123   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
124 
125   PetscFunctionBegin;
126   *v=aa->a;
127   if (reuse == MAT_INITIAL_MATRIX) {
128     *r   = (INT_TYPE*)aa->i;
129     *c   = (INT_TYPE*)aa->j;
130     *nnz = aa->nz;
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO"
137 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
138 {
139   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
140   PetscErrorCode    ierr;
141   PetscInt          rstart,nz,i,j,jj,irow,countA,countB;
142   PetscInt          *row,*col;
143   const PetscScalar *av, *bv,*v1,*v2;
144   PetscScalar       *val;
145   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
146   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
147   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
148   PetscInt          nn, colA_start,jB,jcol;
149 
150   PetscFunctionBegin;
151   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
152   av=aa->a; bv=bb->a;
153 
154   garray = mat->garray;
155 
156   if (reuse == MAT_INITIAL_MATRIX) {
157     nz   = aa->nz + bb->nz;
158     *nnz = nz;
159     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
160     col  = row + m + 1;
161     val  = (PetscScalar*)(col + nz);
162     *r = row; *c = col; *v = val;
163     row[0] = 0;
164   } else {
165     row = *r; col = *c; val = *v;
166   }
167 
168   nz = 0;
169   for (i=0; i<m; i++) {
170     row[i] = nz;
171     countA     = ai[i+1] - ai[i];
172     countB     = bi[i+1] - bi[i];
173     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
174     bjj        = bj + bi[i];
175 
176     /* B part, smaller col index */
177     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
178     jB         = 0;
179     for (j=0; j<countB; j++) {
180       jcol = garray[bjj[j]];
181       if (jcol > colA_start) {
182         jB = j;
183         break;
184       }
185       col[nz]   = jcol;
186       val[nz++] = *bv++;
187       if (j==countB-1) jB = countB;
188     }
189 
190     /* A part */
191     for (j=0; j<countA; j++) {
192       col[nz]   = rstart + ajj[j];
193       val[nz++] = *av++;
194     }
195 
196     /* B part, larger col index */
197     for (j=jB; j<countB; j++) {
198       col[nz]   = garray[bjj[j]];
199       val[nz++] = *bv++;
200     }
201   }
202   row[m] = nz;
203 
204   PetscFunctionReturn(0);
205 }
206 
207 /*
208  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
209  */
210 #undef __FUNCT__
211 #define __FUNCT__ "MatDestroy_MKL_CPARDISO"
212 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
213 {
214   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
215   PetscErrorCode   ierr;
216 
217   PetscFunctionBegin;
218   /* Terminate instance, deallocate memories */
219   if (mat_mkl_cpardiso->CleanUp) {
220     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
221 
222     cluster_sparse_solver (
223       mat_mkl_cpardiso->pt,
224       &mat_mkl_cpardiso->maxfct,
225       &mat_mkl_cpardiso->mnum,
226       &mat_mkl_cpardiso->mtype,
227       &mat_mkl_cpardiso->phase,
228       &mat_mkl_cpardiso->n,
229       NULL,
230       NULL,
231       NULL,
232       mat_mkl_cpardiso->perm,
233       &mat_mkl_cpardiso->nrhs,
234       mat_mkl_cpardiso->iparm,
235       &mat_mkl_cpardiso->msglvl,
236       NULL,
237       NULL,
238       &mat_mkl_cpardiso->comm_mkl_cpardiso,
239       &mat_mkl_cpardiso->err);
240   }
241 
242   if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) {
243     ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr);
244   }
245   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
246   ierr = PetscFree(A->data);CHKERRQ(ierr);
247 
248   /* clear composed functions */
249   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
250   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 /*
255  * Computes Ax = b
256  */
257 #undef __FUNCT__
258 #define __FUNCT__ "MatSolve_MKL_CPARDISO"
259 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
260 {
261   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
262   PetscErrorCode    ierr;
263   PetscScalar       *xarray;
264   const PetscScalar *barray;
265 
266   PetscFunctionBegin;
267   mat_mkl_cpardiso->nrhs = 1;
268   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
269   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
270 
271   /* solve phase */
272   /*-------------*/
273   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
274   cluster_sparse_solver (
275     mat_mkl_cpardiso->pt,
276     &mat_mkl_cpardiso->maxfct,
277     &mat_mkl_cpardiso->mnum,
278     &mat_mkl_cpardiso->mtype,
279     &mat_mkl_cpardiso->phase,
280     &mat_mkl_cpardiso->n,
281     mat_mkl_cpardiso->a,
282     mat_mkl_cpardiso->ia,
283     mat_mkl_cpardiso->ja,
284     mat_mkl_cpardiso->perm,
285     &mat_mkl_cpardiso->nrhs,
286     mat_mkl_cpardiso->iparm,
287     &mat_mkl_cpardiso->msglvl,
288     (void*)barray,
289     (void*)xarray,
290     &mat_mkl_cpardiso->comm_mkl_cpardiso,
291     &mat_mkl_cpardiso->err);
292 
293   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
294 
295   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
296   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
297   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSolveTranspose_MKL_CPARDISO"
303 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
304 {
305   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
306   PetscErrorCode   ierr;
307 
308   PetscFunctionBegin;
309 #if defined(PETSC_USE_COMPLEX)
310   mat_mkl_cpardiso->iparm[12 - 1] = 1;
311 #else
312   mat_mkl_cpardiso->iparm[12 - 1] = 2;
313 #endif
314   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
315   mat_mkl_cpardiso->iparm[12 - 1] = 0;
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNCT__
320 #define __FUNCT__ "MatMatSolve_MKL_CPARDISO"
321 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
322 {
323   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
324   PetscErrorCode    ierr;
325   PetscScalar       *barray, *xarray;
326   PetscBool         flg;
327 
328   PetscFunctionBegin;
329   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr);
330   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
331   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr);
332   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
333 
334   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
335 
336   if(mat_mkl_cpardiso->nrhs > 0){
337     ierr = MatDenseGetArray(B,&barray);
338     ierr = MatDenseGetArray(X,&xarray);
339 
340     /* solve phase */
341     /*-------------*/
342     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
343     cluster_sparse_solver (
344       mat_mkl_cpardiso->pt,
345       &mat_mkl_cpardiso->maxfct,
346       &mat_mkl_cpardiso->mnum,
347       &mat_mkl_cpardiso->mtype,
348       &mat_mkl_cpardiso->phase,
349       &mat_mkl_cpardiso->n,
350       mat_mkl_cpardiso->a,
351       mat_mkl_cpardiso->ia,
352       mat_mkl_cpardiso->ja,
353       mat_mkl_cpardiso->perm,
354       &mat_mkl_cpardiso->nrhs,
355       mat_mkl_cpardiso->iparm,
356       &mat_mkl_cpardiso->msglvl,
357       (void*)barray,
358       (void*)xarray,
359       &mat_mkl_cpardiso->comm_mkl_cpardiso,
360       &mat_mkl_cpardiso->err);
361     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
362   }
363   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
364   PetscFunctionReturn(0);
365 
366 }
367 
368 /*
369  * LU Decomposition
370  */
371 #undef __FUNCT__
372 #define __FUNCT__ "MatFactorNumeric_MKL_CPARDISO"
373 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
374 {
375   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
376   PetscErrorCode   ierr;
377 
378   PetscFunctionBegin;
379   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
380   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
381 
382   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
383   cluster_sparse_solver (
384     mat_mkl_cpardiso->pt,
385     &mat_mkl_cpardiso->maxfct,
386     &mat_mkl_cpardiso->mnum,
387     &mat_mkl_cpardiso->mtype,
388     &mat_mkl_cpardiso->phase,
389     &mat_mkl_cpardiso->n,
390     mat_mkl_cpardiso->a,
391     mat_mkl_cpardiso->ia,
392     mat_mkl_cpardiso->ja,
393     mat_mkl_cpardiso->perm,
394     &mat_mkl_cpardiso->nrhs,
395     mat_mkl_cpardiso->iparm,
396     &mat_mkl_cpardiso->msglvl,
397     NULL,
398     NULL,
399     &mat_mkl_cpardiso->comm_mkl_cpardiso,
400     &mat_mkl_cpardiso->err);
401   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
402 
403   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
404   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
405   PetscFunctionReturn(0);
406 }
407 
408 /* Sets mkl_cpardiso options from the options database */
409 #undef __FUNCT__
410 #define __FUNCT__ "PetscSetMKL_CPARDISOFromOptions"
411 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
412 {
413   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
414   PetscErrorCode      ierr;
415   PetscInt            icntl;
416   PetscBool           flg;
417   int                 pt[IPARM_SIZE], threads;
418 
419   PetscFunctionBegin;
420   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
421   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr);
422   if (flg) mkl_set_num_threads(threads);
423 
424   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr);
425   if (flg) mat_mkl_cpardiso->maxfct = icntl;
426 
427   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr);
428   if (flg) mat_mkl_cpardiso->mnum = icntl;
429 
430   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr);
431   if (flg) mat_mkl_cpardiso->msglvl = icntl;
432 
433   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr);
434   if(flg){
435     mat_mkl_cpardiso->mtype = icntl;
436 #if defined(PETSC_USE_REAL_SINGLE)
437     mat_mkl_cpardiso->iparm[27] = 1;
438 #else
439     mat_mkl_cpardiso->iparm[27] = 0;
440 #endif
441     mat_mkl_cpardiso->iparm[34] = 1;
442   }
443   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr);
444 
445   if(flg && icntl != 0){
446     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr);
447     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
448 
449     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr);
450     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
451 
452     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr);
453     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
454 
455     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr);
456     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
457 
458     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr);
459     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
460 
461     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr);
462     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
463 
464     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr);
465     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
466 
467     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr);
468     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
469 
470     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
471       &flg);CHKERRQ(ierr);
472     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
473 
474     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
475       &flg);CHKERRQ(ierr);
476     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
477 
478     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr);
479     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
480 
481     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr);
482     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
483 
484     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr);
485     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
486 
487     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr);
488     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
489 
490     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr);
491     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
492 
493     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr);
494     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
495 
496     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr);
497     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
498 
499     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr);
500     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
501   }
502 
503   ierr = PetscOptionsEnd();CHKERRQ(ierr);
504   PetscFunctionReturn(0);
505 }
506 
507 #undef __FUNCT__
508 #define __FUNCT__ "PetscInitialize_MKL_CPARDISO"
509 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
510 {
511   PetscErrorCode  ierr;
512   PetscInt        i;
513   PetscMPIInt     size;
514 
515   PetscFunctionBegin;
516 
517   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
518   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
519 
520   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
521   mat_mkl_cpardiso->maxfct = 1;
522   mat_mkl_cpardiso->mnum = 1;
523   mat_mkl_cpardiso->n = A->rmap->N;
524   mat_mkl_cpardiso->msglvl = 0;
525   mat_mkl_cpardiso->nrhs = 1;
526   mat_mkl_cpardiso->err = 0;
527   mat_mkl_cpardiso->phase = -1;
528 #if defined(PETSC_USE_COMPLEX)
529   mat_mkl_cpardiso->mtype = 13;
530 #else
531   mat_mkl_cpardiso->mtype = 11;
532 #endif
533 
534 #if defined(PETSC_USE_REAL_SINGLE)
535   mat_mkl_cpardiso->iparm[27] = 1;
536 #else
537   mat_mkl_cpardiso->iparm[27] = 0;
538 #endif
539 
540   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
541 
542   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
543   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
544   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
545   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
546   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
547   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
548   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
549   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
550   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
551   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
552 
553   mat_mkl_cpardiso->iparm[39] = 0;
554   if (size > 1) {
555     mat_mkl_cpardiso->iparm[39] = 2;
556     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
557     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
558   }
559   mat_mkl_cpardiso->perm = 0;
560   PetscFunctionReturn(0);
561 }
562 
563 /*
564  * Symbolic decomposition. Mkl_Pardiso analysis phase.
565  */
566 #undef __FUNCT__
567 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_CPARDISO"
568 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
569 {
570   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
571   PetscErrorCode  ierr;
572 
573   PetscFunctionBegin;
574   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
575 
576   /* Set MKL_CPARDISO options from the options database */
577   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
578   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
579 
580   mat_mkl_cpardiso->n = A->rmap->N;
581 
582   /* analysis phase */
583   /*----------------*/
584   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
585 
586   cluster_sparse_solver (
587     mat_mkl_cpardiso->pt,
588     &mat_mkl_cpardiso->maxfct,
589     &mat_mkl_cpardiso->mnum,
590     &mat_mkl_cpardiso->mtype,
591     &mat_mkl_cpardiso->phase,
592     &mat_mkl_cpardiso->n,
593     mat_mkl_cpardiso->a,
594     mat_mkl_cpardiso->ia,
595     mat_mkl_cpardiso->ja,
596     mat_mkl_cpardiso->perm,
597     &mat_mkl_cpardiso->nrhs,
598     mat_mkl_cpardiso->iparm,
599     &mat_mkl_cpardiso->msglvl,
600     NULL,
601     NULL,
602     &mat_mkl_cpardiso->comm_mkl_cpardiso,
603     &mat_mkl_cpardiso->err);
604 
605   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
606 
607   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
608   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
609   F->ops->solve           = MatSolve_MKL_CPARDISO;
610   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
611   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
612   PetscFunctionReturn(0);
613 }
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "MatView_MKL_CPARDISO"
617 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
618 {
619   PetscErrorCode    ierr;
620   PetscBool         iascii;
621   PetscViewerFormat format;
622   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
623   PetscInt          i;
624 
625   PetscFunctionBegin;
626   /* check if matrix is mkl_cpardiso type */
627   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
628 
629   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
630   if (iascii) {
631     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
632     if (format == PETSC_VIEWER_ASCII_INFO) {
633       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
634       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
635       for(i = 1; i <= 64; i++){
636         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
637       }
638       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
639       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
640       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
641       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
642       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
643       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatGetInfo_MKL_CPARDISO"
651 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
652 {
653   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data;
654 
655   PetscFunctionBegin;
656   info->block_size        = 1.0;
657   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
658   info->nz_unneeded       = 0.0;
659   info->assemblies        = 0.0;
660   info->mallocs           = 0.0;
661   info->memory            = 0.0;
662   info->fill_ratio_given  = 0;
663   info->fill_ratio_needed = 0;
664   info->factor_mallocs    = 0;
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNCT__
669 #define __FUNCT__ "MatMkl_CPardisoSetCntl_MKL_CPARDISO"
670 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
671 {
672   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data;
673 
674   PetscFunctionBegin;
675   if(icntl <= 64){
676     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
677   } else {
678     if(icntl == 65) mkl_set_num_threads((int)ival);
679     else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival;
680     else if(icntl == 67) mat_mkl_cpardiso->mnum = ival;
681     else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival;
682     else if(icntl == 69){
683       int pt[IPARM_SIZE];
684       mat_mkl_cpardiso->mtype = ival;
685 #if defined(PETSC_USE_REAL_SINGLE)
686       mat_mkl_cpardiso->iparm[27] = 1;
687 #else
688       mat_mkl_cpardiso->iparm[27] = 0;
689 #endif
690       mat_mkl_cpardiso->iparm[34] = 1;
691     }
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 #undef __FUNCT__
697 #define __FUNCT__ "MatMkl_CPardisoSetCntl"
698 /*@
699   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
700 
701    Logically Collective on Mat
702 
703    Input Parameters:
704 +  F - the factored matrix obtained by calling MatGetFactor()
705 .  icntl - index of Mkl_Pardiso parameter
706 -  ival - value of Mkl_Pardiso parameter
707 
708   Options Database:
709 .   -mat_mkl_cpardiso_<icntl> <ival>
710 
711    Level: beginner
712 
713    References:
714 .      Mkl_Pardiso Users' Guide
715 
716 .seealso: MatGetFactor()
717 @*/
718 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
719 {
720   PetscErrorCode ierr;
721 
722   PetscFunctionBegin;
723   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_cpardiso"
729 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type)
730 {
731   PetscFunctionBegin;
732   *type = MATSOLVERMKL_CPARDISO;
733   PetscFunctionReturn(0);
734 }
735 
736 /* MatGetFactor for MPI AIJ matrices */
737 #undef __FUNCT__
738 #define __FUNCT__ "MatGetFactor_mpiaij_mkl_cpardiso"
739 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
740 {
741   Mat              B;
742   PetscErrorCode   ierr;
743   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
744   PetscBool        isSeqAIJ;
745 
746   PetscFunctionBegin;
747   /* Create the factorization matrix */
748 
749   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
750   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
751   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
752   ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr);
753   ierr = MatSetUp(B);CHKERRQ(ierr);
754 
755   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
756 
757   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
758   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
759 
760   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
761   B->ops->destroy = MatDestroy_MKL_CPARDISO;
762 
763   B->ops->view    = MatView_MKL_CPARDISO;
764   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
765 
766   B->factortype   = ftype;
767   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
768 
769   B->data = mat_mkl_cpardiso;
770 
771   /* set solvertype */
772   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
773   ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr);
774 
775   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr);
776   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
777   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
778 
779   *F = B;
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatSolverPackageRegister_MKL_CPardiso"
785 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void)
786 {
787   PetscErrorCode ierr;
788 
789   PetscFunctionBegin;
790   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
791   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794