xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision c688d0420b4e513ff34944d1e1ad7d4e50aafa8d)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #include <mkl.h>
12 #include <mkl_cluster_sparse_solver.h>
13 
14 /*
15  *  Possible mkl_cpardiso phases that controls the execution of the solver.
16  *  For more information check mkl_cpardiso manual.
17  */
18 #define JOB_ANALYSIS 11
19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
21 #define JOB_NUMERICAL_FACTORIZATION 22
22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
27 #define JOB_RELEASE_OF_LU_MEMORY 0
28 #define JOB_RELEASE_OF_ALL_MEMORY -1
29 
30 #define IPARM_SIZE 64
31 #define INT_TYPE MKL_INT
32 
33 static const char *Err_MSG_CPardiso(int errNo){
34   switch (errNo) {
35     case -1:
36       return "input inconsistent"; break;
37     case -2:
38       return "not enough memory"; break;
39     case -3:
40       return "reordering problem"; break;
41     case -4:
42       return "zero pivot, numerical factorization or iterative refinement problem"; break;
43     case -5:
44       return "unclassified (internal) error"; break;
45     case -6:
46       return "preordering failed (matrix types 11, 13 only)"; break;
47     case -7:
48       return "diagonal matrix problem"; break;
49     case -8:
50       return "32-bit integer overflow problem"; break;
51     case -9:
52       return "not enough memory for OOC"; break;
53     case -10:
54       return "problems with opening OOC temporary files"; break;
55     case -11:
56       return "read/write problems with the OOC data file"; break;
57     default :
58       return "unknown error";
59   }
60 }
61 
62 /*
63  *  Internal data structure.
64  *  For more information check mkl_cpardiso manual.
65  */
66 
67 typedef struct {
68 
69   /* Configuration vector */
70   INT_TYPE     iparm[IPARM_SIZE];
71 
72   /*
73    * Internal mkl_cpardiso memory location.
74    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
75    */
76   void         *pt[IPARM_SIZE];
77 
78   MPI_Comm     comm_mkl_cpardiso;
79 
80   /* Basic mkl_cpardiso info*/
81   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
82 
83   /* Matrix structure */
84   PetscScalar  *a;
85 
86   INT_TYPE     *ia, *ja;
87 
88   /* Number of non-zero elements */
89   INT_TYPE     nz;
90 
91   /* Row permutaton vector*/
92   INT_TYPE     *perm;
93 
94   /* Define is matrix preserve sparce structure. */
95   MatStructure matstruc;
96 
97   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
98 
99   /* True if mkl_cpardiso function have been used. */
100   PetscBool CleanUp;
101 } Mat_MKL_CPARDISO;
102 
103 /*
104  * Copy the elements of matrix A.
105  * Input:
106  *   - Mat A: MATSEQAIJ matrix
107  *   - int shift: matrix index.
108  *     - 0 for c representation
109  *     - 1 for fortran representation
110  *   - MatReuse reuse:
111  *     - MAT_INITIAL_MATRIX: Create a new aij representation
112  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
113  * Output:
114  *   - int *nnz: Number of nonzero-elements.
115  *   - int **r pointer to i index
116  *   - int **c pointer to j elements
117  *   - MATRIXTYPE **v: Non-zero elements
118  */
119 #undef __FUNCT__
120 #define __FUNCT__ "MatCopy_seqaij_seqaij_MKL_CPARDISO"
121 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
122 {
123   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
124 
125   PetscFunctionBegin;
126   *v=aa->a;
127   if (reuse == MAT_INITIAL_MATRIX) {
128     *r   = (INT_TYPE*)aa->i;
129     *c   = (INT_TYPE*)aa->j;
130     *nnz = aa->nz;
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO"
137 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
138 {
139   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
140   PetscErrorCode    ierr;
141   PetscInt          rstart,nz,i,j,jj,irow,countA,countB;
142   PetscInt          *row,*col;
143   const PetscScalar *av, *bv,*v1,*v2;
144   PetscScalar       *val;
145   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
146   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
147   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
148   PetscInt          nn, colA_start,jB,jcol;
149 
150   PetscFunctionBegin;
151   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
152   av=aa->a; bv=bb->a;
153 
154   garray = mat->garray;
155 
156   if (reuse == MAT_INITIAL_MATRIX) {
157     nz   = aa->nz + bb->nz;
158     *nnz = nz;
159     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
160     col  = row + m + 1;
161     val  = (PetscScalar*)(col + nz);
162     *r = row; *c = col; *v = val;
163     row[0] = 0;
164   } else {
165     row = *r; col = *c; val = *v;
166   }
167 
168   nz = 0;
169   for (i=0; i<m; i++) {
170     row[i] = nz;
171     countA     = ai[i+1] - ai[i];
172     countB     = bi[i+1] - bi[i];
173     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
174     bjj        = bj + bi[i];
175 
176     /* B part, smaller col index */
177     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
178     jB         = 0;
179     for (j=0; j<countB; j++) {
180       jcol = garray[bjj[j]];
181       if (jcol > colA_start) {
182         jB = j;
183         break;
184       }
185       col[nz]   = jcol;
186       val[nz++] = *bv++;
187       if (j==countB-1) jB = countB;
188     }
189 
190     /* A part */
191     for (j=0; j<countA; j++) {
192       col[nz]   = rstart + ajj[j];
193       val[nz++] = *av++;
194     }
195 
196     /* B part, larger col index */
197     for (j=jB; j<countB; j++) {
198       col[nz]   = garray[bjj[j]];
199       val[nz++] = *bv++;
200     }
201   }
202   row[m] = nz;
203 
204   PetscFunctionReturn(0);
205 }
206 
207 /*
208  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
209  */
210 #undef __FUNCT__
211 #define __FUNCT__ "MatDestroy_MKL_CPARDISO"
212 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
213 {
214   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
215   PetscErrorCode   ierr;
216 
217   PetscFunctionBegin;
218   /* Terminate instance, deallocate memories */
219   if (mat_mkl_cpardiso->CleanUp) {
220     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
221 
222     cluster_sparse_solver (
223       mat_mkl_cpardiso->pt,
224       &mat_mkl_cpardiso->maxfct,
225       &mat_mkl_cpardiso->mnum,
226       &mat_mkl_cpardiso->mtype,
227       &mat_mkl_cpardiso->phase,
228       &mat_mkl_cpardiso->n,
229       NULL,
230       NULL,
231       NULL,
232       mat_mkl_cpardiso->perm,
233       &mat_mkl_cpardiso->nrhs,
234       mat_mkl_cpardiso->iparm,
235       &mat_mkl_cpardiso->msglvl,
236       NULL,
237       NULL,
238       &mat_mkl_cpardiso->comm_mkl_cpardiso,
239       &mat_mkl_cpardiso->err);
240   }
241 
242   if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) {
243     ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr);
244   }
245   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
246   ierr = PetscFree(A->data);CHKERRQ(ierr);
247 
248   /* clear composed functions */
249   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
250   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 /*
255  * Computes Ax = b
256  */
257 #undef __FUNCT__
258 #define __FUNCT__ "MatSolve_MKL_CPARDISO"
259 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
260 {
261   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
262   PetscErrorCode    ierr;
263   PetscScalar       *xarray;
264   const PetscScalar *barray;
265 
266   PetscFunctionBegin;
267   mat_mkl_cpardiso->nrhs = 1;
268   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
269   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
270 
271   /* solve phase */
272   /*-------------*/
273   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
274   cluster_sparse_solver (
275     mat_mkl_cpardiso->pt,
276     &mat_mkl_cpardiso->maxfct,
277     &mat_mkl_cpardiso->mnum,
278     &mat_mkl_cpardiso->mtype,
279     &mat_mkl_cpardiso->phase,
280     &mat_mkl_cpardiso->n,
281     mat_mkl_cpardiso->a,
282     mat_mkl_cpardiso->ia,
283     mat_mkl_cpardiso->ja,
284     mat_mkl_cpardiso->perm,
285     &mat_mkl_cpardiso->nrhs,
286     mat_mkl_cpardiso->iparm,
287     &mat_mkl_cpardiso->msglvl,
288     (void*)barray,
289     (void*)xarray,
290     &mat_mkl_cpardiso->comm_mkl_cpardiso,
291     &mat_mkl_cpardiso->err);
292 
293   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
294 
295   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
296   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
297   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSolveTranspose_MKL_CPARDISO"
303 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
304 {
305   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
306   PetscErrorCode   ierr;
307 
308   PetscFunctionBegin;
309 #if defined(PETSC_USE_COMPLEX)
310   mat_mkl_cpardiso->iparm[12 - 1] = 1;
311 #else
312   mat_mkl_cpardiso->iparm[12 - 1] = 2;
313 #endif
314   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
315   mat_mkl_cpardiso->iparm[12 - 1] = 0;
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNCT__
320 #define __FUNCT__ "MatMatSolve_MKL_CPARDISO"
321 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
322 {
323   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
324   PetscErrorCode    ierr;
325   PetscScalar       *barray, *xarray;
326   PetscBool         flg;
327 
328   PetscFunctionBegin;
329   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr);
330   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
331   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr);
332   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
333 
334   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
335 
336   if(mat_mkl_cpardiso->nrhs > 0){
337     ierr = MatDenseGetArray(B,&barray);
338     ierr = MatDenseGetArray(X,&xarray);
339 
340     /* solve phase */
341     /*-------------*/
342     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
343     cluster_sparse_solver (
344       mat_mkl_cpardiso->pt,
345       &mat_mkl_cpardiso->maxfct,
346       &mat_mkl_cpardiso->mnum,
347       &mat_mkl_cpardiso->mtype,
348       &mat_mkl_cpardiso->phase,
349       &mat_mkl_cpardiso->n,
350       mat_mkl_cpardiso->a,
351       mat_mkl_cpardiso->ia,
352       mat_mkl_cpardiso->ja,
353       mat_mkl_cpardiso->perm,
354       &mat_mkl_cpardiso->nrhs,
355       mat_mkl_cpardiso->iparm,
356       &mat_mkl_cpardiso->msglvl,
357       (void*)barray,
358       (void*)xarray,
359       &mat_mkl_cpardiso->comm_mkl_cpardiso,
360       &mat_mkl_cpardiso->err);
361     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
362   }
363   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
364   PetscFunctionReturn(0);
365 
366 }
367 
368 /*
369  * LU Decomposition
370  */
371 #undef __FUNCT__
372 #define __FUNCT__ "MatFactorNumeric_MKL_CPARDISO"
373 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
374 {
375   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
376   PetscErrorCode   ierr;
377 
378   PetscFunctionBegin;
379   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
380   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
381 
382   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
383   cluster_sparse_solver (
384     mat_mkl_cpardiso->pt,
385     &mat_mkl_cpardiso->maxfct,
386     &mat_mkl_cpardiso->mnum,
387     &mat_mkl_cpardiso->mtype,
388     &mat_mkl_cpardiso->phase,
389     &mat_mkl_cpardiso->n,
390     mat_mkl_cpardiso->a,
391     mat_mkl_cpardiso->ia,
392     mat_mkl_cpardiso->ja,
393     mat_mkl_cpardiso->perm,
394     &mat_mkl_cpardiso->nrhs,
395     mat_mkl_cpardiso->iparm,
396     &mat_mkl_cpardiso->msglvl,
397     NULL,
398     NULL,
399     &mat_mkl_cpardiso->comm_mkl_cpardiso,
400     &mat_mkl_cpardiso->err);
401   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
402 
403   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
404   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
405   PetscFunctionReturn(0);
406 }
407 
408 /* Sets mkl_cpardiso options from the options database */
409 #undef __FUNCT__
410 #define __FUNCT__ "PetscSetMKL_CPARDISOFromOptions"
411 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
412 {
413   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
414   PetscErrorCode      ierr;
415   PetscInt            icntl;
416   PetscBool           flg;
417   int                 pt[IPARM_SIZE], threads;
418 
419   PetscFunctionBegin;
420   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
421   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr);
422   if (flg) mkl_set_num_threads(threads);
423 
424   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr);
425   if (flg) mat_mkl_cpardiso->maxfct = icntl;
426 
427   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr);
428   if (flg) mat_mkl_cpardiso->mnum = icntl;
429 
430   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr);
431   if (flg) mat_mkl_cpardiso->msglvl = icntl;
432 
433   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr);
434   if(flg){
435     mat_mkl_cpardiso->mtype = icntl;
436 #if defined(PETSC_USE_REAL_SINGLE)
437     mat_mkl_cpardiso->iparm[27] = 1;
438 #else
439     mat_mkl_cpardiso->iparm[27] = 0;
440 #endif
441     mat_mkl_cpardiso->iparm[34] = 1;
442   }
443   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr);
444 
445   if(flg && icntl != 0){
446     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr);
447     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
448 
449     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr);
450     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
451 
452     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr);
453     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
454 
455     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr);
456     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
457 
458     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr);
459     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
460 
461     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr);
462     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
463 
464     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr);
465     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
466 
467     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr);
468     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
469 
470     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
471       &flg);CHKERRQ(ierr);
472     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
473 
474     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
475       &flg);CHKERRQ(ierr);
476     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
477 
478     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr);
479     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
480 
481     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr);
482     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
483 
484     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr);
485     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
486 
487     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr);
488     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
489 
490     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr);
491     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
492 
493     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr);
494     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
495 
496     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr);
497     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
498 
499     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr);
500     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
501   }
502 
503   ierr = PetscOptionsEnd();CHKERRQ(ierr);
504   PetscFunctionReturn(0);
505 }
506 
507 #undef __FUNCT__
508 #define __FUNCT__ "PetscInitialize_MKL_CPARDISO"
509 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
510 {
511   PetscErrorCode  ierr;
512   PetscInt        i;
513   PetscMPIInt     size;
514 
515   PetscFunctionBegin;
516 
517   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
518   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
519 
520   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
521   mat_mkl_cpardiso->maxfct = 1;
522   mat_mkl_cpardiso->mnum = 1;
523   mat_mkl_cpardiso->n = A->rmap->N;
524   mat_mkl_cpardiso->msglvl = 0;
525   mat_mkl_cpardiso->nrhs = 1;
526   mat_mkl_cpardiso->err = 0;
527   mat_mkl_cpardiso->phase = -1;
528 #if defined(PETSC_USE_COMPLEX)
529   mat_mkl_cpardiso->mtype = 13;
530 #else
531   mat_mkl_cpardiso->mtype = 11;
532 #endif
533 
534 #if defined(PETSC_USE_REAL_SINGLE)
535   mat_mkl_cpardiso->iparm[27] = 1;
536 #else
537   mat_mkl_cpardiso->iparm[27] = 0;
538 #endif
539 
540   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
541 
542   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
543   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
544   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
545   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
546   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
547   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
548   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
549   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
550   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
551   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
552 
553   mat_mkl_cpardiso->iparm[39] = 0;
554   if (size > 1) {
555     mat_mkl_cpardiso->iparm[39] = 2;
556     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
557     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
558   }
559   mat_mkl_cpardiso->perm = 0;
560   PetscFunctionReturn(0);
561 }
562 
563 /*
564  * Symbolic decomposition. Mkl_Pardiso analysis phase.
565  */
566 #undef __FUNCT__
567 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_CPARDISO"
568 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
569 {
570   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
571   PetscErrorCode  ierr;
572 
573   PetscFunctionBegin;
574   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
575 
576   /* Set MKL_CPARDISO options from the options database */
577   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
578   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
579 
580   mat_mkl_cpardiso->n = A->rmap->N;
581 
582   /* analysis phase */
583   /*----------------*/
584   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
585 
586   cluster_sparse_solver (
587     mat_mkl_cpardiso->pt,
588     &mat_mkl_cpardiso->maxfct,
589     &mat_mkl_cpardiso->mnum,
590     &mat_mkl_cpardiso->mtype,
591     &mat_mkl_cpardiso->phase,
592     &mat_mkl_cpardiso->n,
593     mat_mkl_cpardiso->a,
594     mat_mkl_cpardiso->ia,
595     mat_mkl_cpardiso->ja,
596     mat_mkl_cpardiso->perm,
597     &mat_mkl_cpardiso->nrhs,
598     mat_mkl_cpardiso->iparm,
599     &mat_mkl_cpardiso->msglvl,
600     NULL,
601     NULL,
602     &mat_mkl_cpardiso->comm_mkl_cpardiso,
603     &mat_mkl_cpardiso->err);
604 
605   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
606 
607   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
608   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
609   F->ops->solve           = MatSolve_MKL_CPARDISO;
610   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
611   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
612   PetscFunctionReturn(0);
613 }
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "MatView_MKL_CPARDISO"
617 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
618 {
619   PetscErrorCode    ierr;
620   PetscBool         iascii;
621   PetscViewerFormat format;
622   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
623   PetscInt          i;
624 
625   PetscFunctionBegin;
626   /* check if matrix is mkl_cpardiso type */
627   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
628 
629   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
630   if (iascii) {
631     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
632     if (format == PETSC_VIEWER_ASCII_INFO) {
633       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
634       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
635       for(i = 1; i <= 64; i++){
636         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
637       }
638       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
639       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
640       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
641       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
642       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
643       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatGetInfo_MKL_CPARDISO"
651 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
652 {
653   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data;
654 
655   PetscFunctionBegin;
656   info->block_size        = 1.0;
657   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
658   info->nz_unneeded       = 0.0;
659   info->assemblies        = 0.0;
660   info->mallocs           = 0.0;
661   info->memory            = 0.0;
662   info->fill_ratio_given  = 0;
663   info->fill_ratio_needed = 0;
664   info->factor_mallocs    = 0;
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNCT__
669 #define __FUNCT__ "MatMkl_CPardisoSetCntl_MKL_CPARDISO"
670 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
671 {
672   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data;
673 
674   PetscFunctionBegin;
675   if(icntl <= 64){
676     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
677   } else {
678     if(icntl == 65) mkl_set_num_threads((int)ival);
679     else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival;
680     else if(icntl == 67) mat_mkl_cpardiso->mnum = ival;
681     else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival;
682     else if(icntl == 69){
683       int pt[IPARM_SIZE];
684       mat_mkl_cpardiso->mtype = ival;
685 #if defined(PETSC_USE_REAL_SINGLE)
686       mat_mkl_cpardiso->iparm[27] = 1;
687 #else
688       mat_mkl_cpardiso->iparm[27] = 0;
689 #endif
690       mat_mkl_cpardiso->iparm[34] = 1;
691     }
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 #undef __FUNCT__
697 #define __FUNCT__ "MatMkl_CPardisoSetCntl"
698 /*@
699   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
700 
701    Logically Collective on Mat
702 
703    Input Parameters:
704 +  F - the factored matrix obtained by calling MatGetFactor()
705 .  icntl - index of Mkl_Pardiso parameter
706 -  ival - value of Mkl_Pardiso parameter
707 
708   Options Database:
709 .   -mat_mkl_cpardiso_<icntl> <ival>
710 
711    Level: Intermediate
712 
713    Notes: This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options
714           database approach when working with TS, SNES, or KSP.
715 
716    References:
717 .      Mkl_Pardiso Users' Guide
718 
719 .seealso: MatGetFactor()
720 @*/
721 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
722 {
723   PetscErrorCode ierr;
724 
725   PetscFunctionBegin;
726   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_cpardiso"
732 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type)
733 {
734   PetscFunctionBegin;
735   *type = MATSOLVERMKL_CPARDISO;
736   PetscFunctionReturn(0);
737 }
738 
739 /* MatGetFactor for MPI AIJ matrices */
740 #undef __FUNCT__
741 #define __FUNCT__ "MatGetFactor_mpiaij_mkl_cpardiso"
742 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
743 {
744   Mat              B;
745   PetscErrorCode   ierr;
746   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
747   PetscBool        isSeqAIJ;
748 
749   PetscFunctionBegin;
750   /* Create the factorization matrix */
751 
752   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
753   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
754   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
755   ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr);
756   ierr = MatSetUp(B);CHKERRQ(ierr);
757 
758   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
759 
760   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
761   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
762 
763   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
764   B->ops->destroy = MatDestroy_MKL_CPARDISO;
765 
766   B->ops->view    = MatView_MKL_CPARDISO;
767   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
768 
769   B->factortype   = ftype;
770   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
771 
772   B->data = mat_mkl_cpardiso;
773 
774   /* set solvertype */
775   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
776   ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr);
777 
778   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr);
779   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
780   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
781 
782   *F = B;
783   PetscFunctionReturn(0);
784 }
785 
786 #undef __FUNCT__
787 #define __FUNCT__ "MatSolverPackageRegister_MKL_CPardiso"
788 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void)
789 {
790   PetscErrorCode ierr;
791 
792   PetscFunctionBegin;
793   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
794   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797