xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision 0fd5cbfeb66ff966a261dff1d7a98e5e5669e067)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #include <mkl.h>
12 #include <mkl_cluster_sparse_solver.h>
13 
14 /*
15  *  Possible mkl_cpardiso phases that controls the execution of the solver.
16  *  For more information check mkl_cpardiso manual.
17  */
18 #define JOB_ANALYSIS 11
19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
21 #define JOB_NUMERICAL_FACTORIZATION 22
22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
27 #define JOB_RELEASE_OF_LU_MEMORY 0
28 #define JOB_RELEASE_OF_ALL_MEMORY -1
29 
30 #define IPARM_SIZE 64
31 #define INT_TYPE MKL_INT
32 
33 static const char *Err_MSG_CPardiso(int errNo){
34   switch (errNo) {
35     case -1:
36       return "input inconsistent"; break;
37     case -2:
38       return "not enough memory"; break;
39     case -3:
40       return "reordering problem"; break;
41     case -4:
42       return "zero pivot, numerical factorization or iterative refinement problem"; break;
43     case -5:
44       return "unclassified (internal) error"; break;
45     case -6:
46       return "preordering failed (matrix types 11, 13 only)"; break;
47     case -7:
48       return "diagonal matrix problem"; break;
49     case -8:
50       return "32-bit integer overflow problem"; break;
51     case -9:
52       return "not enough memory for OOC"; break;
53     case -10:
54       return "problems with opening OOC temporary files"; break;
55     case -11:
56       return "read/write problems with the OOC data file"; break;
57     default :
58       return "unknown error";
59   }
60 }
61 
62 /*
63  *  Internal data structure.
64  *  For more information check mkl_cpardiso manual.
65  */
66 
67 typedef struct {
68 
69   /* Configuration vector */
70   INT_TYPE     iparm[IPARM_SIZE];
71 
72   /*
73    * Internal mkl_cpardiso memory location.
74    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
75    */
76   void         *pt[IPARM_SIZE];
77 
78   MPI_Comm     comm_mkl_cpardiso;
79 
80   /* Basic mkl_cpardiso info*/
81   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
82 
83   /* Matrix structure */
84   PetscScalar  *a;
85 
86   INT_TYPE     *ia, *ja;
87 
88   /* Number of non-zero elements */
89   INT_TYPE     nz;
90 
91   /* Row permutaton vector*/
92   INT_TYPE     *perm;
93 
94   /* Define is matrix preserve sparce structure. */
95   MatStructure matstruc;
96 
97   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
98 
99   /* True if mkl_cpardiso function have been used. */
100   PetscBool CleanUp;
101 } Mat_MKL_CPARDISO;
102 
103 /*
104  * Copy the elements of matrix A.
105  * Input:
106  *   - Mat A: MATSEQAIJ matrix
107  *   - int shift: matrix index.
108  *     - 0 for c representation
109  *     - 1 for fortran representation
110  *   - MatReuse reuse:
111  *     - MAT_INITIAL_MATRIX: Create a new aij representation
112  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
113  * Output:
114  *   - int *nnz: Number of nonzero-elements.
115  *   - int **r pointer to i index
116  *   - int **c pointer to j elements
117  *   - MATRIXTYPE **v: Non-zero elements
118  */
119 #undef __FUNCT__
120 #define __FUNCT__ "MatCopy_seqaij_seqaij_MKL_CPARDISO"
121 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
122 {
123   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
124 
125   PetscFunctionBegin;
126   *v=aa->a;
127   if (reuse == MAT_INITIAL_MATRIX) {
128     *r   = (INT_TYPE*)aa->i;
129     *c   = (INT_TYPE*)aa->j;
130     *nnz = aa->nz;
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO"
137 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
138 {
139   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
140   PetscErrorCode    ierr;
141   PetscInt          rstart,nz,i,j,jj,irow,countA,countB;
142   PetscInt          *row,*col;
143   const PetscScalar *av, *bv,*v1,*v2;
144   PetscScalar       *val;
145   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
146   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
147   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
148   PetscInt          nn, colA_start,jB,jcol;
149 
150   PetscFunctionBegin;
151   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
152   av=aa->a; bv=bb->a;
153 
154   garray = mat->garray;
155 
156   if (reuse == MAT_INITIAL_MATRIX) {
157     nz   = aa->nz + bb->nz;
158     *nnz = nz;
159     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
160     col  = row + m + 1;
161     val  = (PetscScalar*)(col + nz);
162     *r = row; *c = col; *v = val;
163     row[0] = 0;
164   } else {
165     row = *r; col = *c; val = *v;
166   }
167 
168   nz = 0;
169   for (i=0; i<m; i++) {
170     row[i] = nz;
171     countA     = ai[i+1] - ai[i];
172     countB     = bi[i+1] - bi[i];
173     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
174     bjj        = bj + bi[i];
175 
176     /* B part, smaller col index */
177     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
178     jB         = 0;
179     for (j=0; j<countB; j++) {
180       jcol = garray[bjj[j]];
181       if (jcol > colA_start) {
182         jB = j;
183         break;
184       }
185       col[nz]   = jcol;
186       val[nz++] = *bv++;
187       if (j==countB-1) jB = countB;
188     }
189 
190     /* A part */
191     for (j=0; j<countA; j++) {
192       col[nz]   = rstart + ajj[j];
193       val[nz++] = *av++;
194     }
195 
196     /* B part, larger col index */
197     for (j=jB; j<countB; j++) {
198       col[nz]   = garray[bjj[j]];
199       val[nz++] = *bv++;
200     }
201   }
202   row[m] = nz;
203 
204   PetscFunctionReturn(0);
205 }
206 
207 /*
208  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
209  */
210 #undef __FUNCT__
211 #define __FUNCT__ "MatDestroy_MKL_CPARDISO"
212 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
213 {
214   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
215   PetscErrorCode   ierr;
216 
217   PetscFunctionBegin;
218   /* Terminate instance, deallocate memories */
219   if (mat_mkl_cpardiso->CleanUp) {
220     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
221 
222     cluster_sparse_solver (
223       mat_mkl_cpardiso->pt,
224       &mat_mkl_cpardiso->maxfct,
225       &mat_mkl_cpardiso->mnum,
226       &mat_mkl_cpardiso->mtype,
227       &mat_mkl_cpardiso->phase,
228       &mat_mkl_cpardiso->n,
229       NULL,
230       NULL,
231       NULL,
232       mat_mkl_cpardiso->perm,
233       &mat_mkl_cpardiso->nrhs,
234       mat_mkl_cpardiso->iparm,
235       &mat_mkl_cpardiso->msglvl,
236       NULL,
237       NULL,
238       &mat_mkl_cpardiso->comm_mkl_cpardiso,
239       &mat_mkl_cpardiso->err);
240   }
241   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
242   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
243 
244   /* clear composed functions */
245   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251  * Computes Ax = b
252  */
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSolve_MKL_CPARDISO"
255 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
256 {
257   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr;
258   PetscErrorCode    ierr;
259   PetscScalar       *barray, *xarray;
260 
261   PetscFunctionBegin;
262   mat_mkl_cpardiso->nrhs = 1;
263   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
264   ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
265 
266   /* solve phase */
267   /*-------------*/
268   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
269   cluster_sparse_solver (
270     mat_mkl_cpardiso->pt,
271     &mat_mkl_cpardiso->maxfct,
272     &mat_mkl_cpardiso->mnum,
273     &mat_mkl_cpardiso->mtype,
274     &mat_mkl_cpardiso->phase,
275     &mat_mkl_cpardiso->n,
276     mat_mkl_cpardiso->a,
277     mat_mkl_cpardiso->ia,
278     mat_mkl_cpardiso->ja,
279     mat_mkl_cpardiso->perm,
280     &mat_mkl_cpardiso->nrhs,
281     mat_mkl_cpardiso->iparm,
282     &mat_mkl_cpardiso->msglvl,
283     (void*)barray,
284     (void*)xarray,
285     &mat_mkl_cpardiso->comm_mkl_cpardiso,
286     &mat_mkl_cpardiso->err);
287 
288   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
289 
290   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
291   PetscFunctionReturn(0);
292 }
293 
294 #undef __FUNCT__
295 #define __FUNCT__ "MatSolveTranspose_MKL_CPARDISO"
296 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
297 {
298   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
299   PetscErrorCode   ierr;
300 
301   PetscFunctionBegin;
302 #if defined(PETSC_USE_COMPLEX)
303   mat_mkl_cpardiso->iparm[12 - 1] = 1;
304 #else
305   mat_mkl_cpardiso->iparm[12 - 1] = 2;
306 #endif
307   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
308   mat_mkl_cpardiso->iparm[12 - 1] = 0;
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatMatSolve_MKL_CPARDISO"
314 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
315 {
316   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr;
317   PetscErrorCode    ierr;
318   PetscScalar       *barray, *xarray;
319   PetscBool         flg;
320 
321   PetscFunctionBegin;
322   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr);
323   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
324   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr);
325   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
326 
327   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
328 
329   if(mat_mkl_cpardiso->nrhs > 0){
330     ierr = MatDenseGetArray(B,&barray);
331     ierr = MatDenseGetArray(X,&xarray);
332 
333     /* solve phase */
334     /*-------------*/
335     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
336     cluster_sparse_solver (
337       mat_mkl_cpardiso->pt,
338       &mat_mkl_cpardiso->maxfct,
339       &mat_mkl_cpardiso->mnum,
340       &mat_mkl_cpardiso->mtype,
341       &mat_mkl_cpardiso->phase,
342       &mat_mkl_cpardiso->n,
343       mat_mkl_cpardiso->a,
344       mat_mkl_cpardiso->ia,
345       mat_mkl_cpardiso->ja,
346       mat_mkl_cpardiso->perm,
347       &mat_mkl_cpardiso->nrhs,
348       mat_mkl_cpardiso->iparm,
349       &mat_mkl_cpardiso->msglvl,
350       (void*)barray,
351       (void*)xarray,
352       &mat_mkl_cpardiso->comm_mkl_cpardiso,
353       &mat_mkl_cpardiso->err);
354     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
355   }
356   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
357   PetscFunctionReturn(0);
358 
359 }
360 
361 /*
362  * LU Decomposition
363  */
364 #undef __FUNCT__
365 #define __FUNCT__ "MatFactorNumeric_MKL_CPARDISO"
366 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
367 {
368   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->spptr;
369   PetscErrorCode   ierr;
370 
371   /* numerical factorization phase */
372   /*-------------------------------*/
373 
374   PetscFunctionBegin;
375   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
376   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
377 
378   /* numerical factorization phase */
379   /*-------------------------------*/
380   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
381   cluster_sparse_solver (
382     mat_mkl_cpardiso->pt,
383     &mat_mkl_cpardiso->maxfct,
384     &mat_mkl_cpardiso->mnum,
385     &mat_mkl_cpardiso->mtype,
386     &mat_mkl_cpardiso->phase,
387     &mat_mkl_cpardiso->n,
388     mat_mkl_cpardiso->a,
389     mat_mkl_cpardiso->ia,
390     mat_mkl_cpardiso->ja,
391     mat_mkl_cpardiso->perm,
392     &mat_mkl_cpardiso->nrhs,
393     mat_mkl_cpardiso->iparm,
394     &mat_mkl_cpardiso->msglvl,
395     NULL,
396     NULL,
397     &mat_mkl_cpardiso->comm_mkl_cpardiso,
398     &mat_mkl_cpardiso->err);
399   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
400 
401   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
402   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
403   PetscFunctionReturn(0);
404 }
405 
406 /* Sets mkl_cpardiso options from the options database */
407 #undef __FUNCT__
408 #define __FUNCT__ "PetscSetMKL_CPARDISOFromOptions"
409 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
410 {
411   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr;
412   PetscErrorCode      ierr;
413   PetscInt            icntl;
414   PetscBool           flg;
415   int                 pt[IPARM_SIZE], threads;
416 
417   PetscFunctionBegin;
418   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
419   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65",
420     "Number of thrads to use",
421     "None",
422     threads,
423     &threads,
424     &flg);CHKERRQ(ierr);
425   if (flg) mkl_set_num_threads(threads);
426 
427   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66",
428     "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time",
429     "None",
430      mat_mkl_cpardiso->maxfct,
431     &icntl,
432     &flg);CHKERRQ(ierr);
433   if (flg) mat_mkl_cpardiso->maxfct = icntl;
434 
435   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67",
436     "Indicates the actual matrix for the solution phase",
437     "None",
438     mat_mkl_cpardiso->mnum,
439     &icntl,
440     &flg);CHKERRQ(ierr);
441   if (flg) mat_mkl_cpardiso->mnum = icntl;
442 
443   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68",
444     "Message level information",
445     "None",
446     mat_mkl_cpardiso->msglvl,
447     &icntl,
448     &flg);CHKERRQ(ierr);
449   if (flg) mat_mkl_cpardiso->msglvl = icntl;
450 
451   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69",
452     "Defines the matrix type",
453     "None",
454     mat_mkl_cpardiso->mtype,
455     &icntl,
456     &flg);CHKERRQ(ierr);
457   if(flg){
458     mat_mkl_cpardiso->mtype = icntl;
459 #if defined(PETSC_USE_REAL_SINGLE)
460     mat_mkl_cpardiso->iparm[27] = 1;
461 #else
462     mat_mkl_cpardiso->iparm[27] = 0;
463 #endif
464     mat_mkl_cpardiso->iparm[34] = 1;
465   }
466   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1",
467     "Use default values",
468     "None",
469     mat_mkl_cpardiso->iparm[0],
470     &icntl,
471     &flg);CHKERRQ(ierr);
472 
473   if(flg && icntl != 0){
474     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2",
475       "Fill-in reducing ordering for the input matrix",
476       "None",
477       mat_mkl_cpardiso->iparm[1],
478       &icntl,
479       &flg);CHKERRQ(ierr);
480     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
481 
482     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4",
483       "Preconditioned CGS/CG",
484       "None",
485       mat_mkl_cpardiso->iparm[3],
486       &icntl,
487       &flg);CHKERRQ(ierr);
488     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
489 
490     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5",
491       "User permutation",
492       "None",
493       mat_mkl_cpardiso->iparm[4],
494       &icntl,
495       &flg);CHKERRQ(ierr);
496     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
497 
498     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6",
499       "Write solution on x",
500       "None",
501       mat_mkl_cpardiso->iparm[5],
502       &icntl,
503       &flg);CHKERRQ(ierr);
504     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
505 
506     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8",
507       "Iterative refinement step",
508       "None",
509       mat_mkl_cpardiso->iparm[7],
510       &icntl,
511       &flg);CHKERRQ(ierr);
512     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
513 
514     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10",
515       "Pivoting perturbation",
516       "None",
517       mat_mkl_cpardiso->iparm[9],
518       &icntl,
519       &flg);CHKERRQ(ierr);
520     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
521 
522     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11",
523       "Scaling vectors",
524       "None",
525       mat_mkl_cpardiso->iparm[10],
526       &icntl,
527       &flg);CHKERRQ(ierr);
528     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
529 
530     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12",
531       "Solve with transposed or conjugate transposed matrix A",
532       "None",
533       mat_mkl_cpardiso->iparm[11],
534       &icntl,
535       &flg);CHKERRQ(ierr);
536     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
537 
538     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13",
539       "Improved accuracy using (non-) symmetric weighted matching",
540       "None",
541       mat_mkl_cpardiso->iparm[12],
542       &icntl,
543       &flg);CHKERRQ(ierr);
544     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
545 
546     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18",
547       "Numbers of non-zero elements",
548       "None",
549       mat_mkl_cpardiso->iparm[17],
550       &icntl,
551       &flg);CHKERRQ(ierr);
552     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
553 
554     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19",
555       "Report number of floating point operations",
556       "None",
557       mat_mkl_cpardiso->iparm[18],
558       &icntl,
559       &flg);CHKERRQ(ierr);
560     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
561 
562     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21",
563       "Pivoting for symmetric indefinite matrices",
564       "None",
565       mat_mkl_cpardiso->iparm[20],
566       &icntl,
567       &flg);CHKERRQ(ierr);
568     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
569 
570     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24",
571       "Parallel factorization control",
572       "None",
573       mat_mkl_cpardiso->iparm[23],
574       &icntl,
575       &flg);CHKERRQ(ierr);
576     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
577 
578     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25",
579       "Parallel forward/backward solve control",
580       "None",
581       mat_mkl_cpardiso->iparm[24],
582       &icntl,
583       &flg);CHKERRQ(ierr);
584     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
585 
586     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27",
587       "Matrix checker",
588       "None",
589       mat_mkl_cpardiso->iparm[26],
590       &icntl,
591       &flg);CHKERRQ(ierr);
592     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
593 
594     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31",
595       "Partial solve and computing selected components of the solution vectors",
596       "None",
597       mat_mkl_cpardiso->iparm[30],
598       &icntl,
599       &flg);CHKERRQ(ierr);
600     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
601 
602     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34",
603       "Optimal number of threads for conditional numerical reproducibility (CNR) mode",
604       "None",
605       mat_mkl_cpardiso->iparm[33],
606       &icntl,
607       &flg);CHKERRQ(ierr);
608     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
609 
610     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60",
611       "Intel MKL_CPARDISO mode",
612       "None",
613       mat_mkl_cpardiso->iparm[59],
614       &icntl,
615       &flg);CHKERRQ(ierr);
616     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
617   }
618 
619   PetscOptionsEnd();
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "PetscInitialize_MKL_CPARDISO"
625 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
626 {
627   PetscErrorCode  ierr;
628   PetscInt        i;
629   PetscMPIInt     size;
630 
631   PetscFunctionBegin;
632 
633   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
634   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
635 
636   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
637   mat_mkl_cpardiso->maxfct = 1;
638   mat_mkl_cpardiso->mnum = 1;
639   mat_mkl_cpardiso->n = A->rmap->N;
640   mat_mkl_cpardiso->msglvl = 0;
641   mat_mkl_cpardiso->nrhs = 1;
642   mat_mkl_cpardiso->err = 0;
643   mat_mkl_cpardiso->phase = -1;
644 #if defined(PETSC_USE_COMPLEX)
645   mat_mkl_cpardiso->mtype = 13;
646 #else
647   mat_mkl_cpardiso->mtype = 11;
648 #endif
649 
650 #if defined(PETSC_USE_REAL_SINGLE)
651   mat_mkl_cpardiso->iparm[27] = 1;
652 #else
653   mat_mkl_cpardiso->iparm[27] = 0;
654 #endif
655 
656   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
657 
658   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
659   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
660   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
661   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
662   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
663   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
664   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
665   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
666   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
667   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
668 
669   mat_mkl_cpardiso->iparm[39] = 0;
670   if (size > 1) {
671     mat_mkl_cpardiso->iparm[39] = 2;
672     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
673     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
674   }
675   mat_mkl_cpardiso->perm = 0;
676   PetscFunctionReturn(0);
677 }
678 
679 /*
680  * Symbolic decomposition. Mkl_Pardiso analysis phase.
681  */
682 #undef __FUNCT__
683 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_CPARDISO"
684 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
685 {
686   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr;
687   PetscErrorCode  ierr;
688 
689   PetscFunctionBegin;
690   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
691 
692   /* Set MKL_CPARDISO options from the options database */
693   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
694 
695   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
696 
697   mat_mkl_cpardiso->n = A->rmap->N;
698 
699   /* analysis phase */
700   /*----------------*/
701   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
702 
703   cluster_sparse_solver (
704     mat_mkl_cpardiso->pt,
705     &mat_mkl_cpardiso->maxfct,
706     &mat_mkl_cpardiso->mnum,
707     &mat_mkl_cpardiso->mtype,
708     &mat_mkl_cpardiso->phase,
709     &mat_mkl_cpardiso->n,
710     mat_mkl_cpardiso->a,
711     mat_mkl_cpardiso->ia,
712     mat_mkl_cpardiso->ja,
713     mat_mkl_cpardiso->perm,
714     &mat_mkl_cpardiso->nrhs,
715     mat_mkl_cpardiso->iparm,
716     &mat_mkl_cpardiso->msglvl,
717     NULL,
718     NULL,
719     &mat_mkl_cpardiso->comm_mkl_cpardiso,
720     &mat_mkl_cpardiso->err);
721 
722   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
723 
724   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
725   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
726   F->ops->solve           = MatSolve_MKL_CPARDISO;
727   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
728   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatView_MKL_CPARDISO"
734 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
735 {
736   PetscErrorCode    ierr;
737   PetscBool         iascii;
738   PetscViewerFormat format;
739   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
740   PetscInt          i;
741 
742   PetscFunctionBegin;
743   /* check if matrix is mkl_cpardiso type */
744   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
745 
746   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
747   if (iascii) {
748     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
749     if (format == PETSC_VIEWER_ASCII_INFO) {
750       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
751       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
752       for(i = 1; i <= 64; i++){
753         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
754       }
755       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
756       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
757       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
758       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
759       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
760       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
761     }
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatGetInfo_MKL_CPARDISO"
768 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
769 {
770   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->spptr;
771 
772   PetscFunctionBegin;
773   info->block_size        = 1.0;
774   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
775   info->nz_unneeded       = 0.0;
776   info->assemblies        = 0.0;
777   info->mallocs           = 0.0;
778   info->memory            = 0.0;
779   info->fill_ratio_given  = 0;
780   info->fill_ratio_needed = 0;
781   info->factor_mallocs    = 0;
782   PetscFunctionReturn(0);
783 }
784 
785 #undef __FUNCT__
786 #define __FUNCT__ "MatMkl_CPardisoSetCntl_MKL_CPARDISO"
787 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
788 {
789   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->spptr;
790 
791   PetscFunctionBegin;
792   if(icntl <= 64){
793     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
794   } else {
795     if(icntl == 65)
796       mkl_set_num_threads((int)ival);
797     else if(icntl == 66)
798       mat_mkl_cpardiso->maxfct = ival;
799     else if(icntl == 67)
800       mat_mkl_cpardiso->mnum = ival;
801     else if(icntl == 68)
802       mat_mkl_cpardiso->msglvl = ival;
803     else if(icntl == 69){
804       int pt[IPARM_SIZE];
805       mat_mkl_cpardiso->mtype = ival;
806 #if defined(PETSC_USE_REAL_SINGLE)
807       mat_mkl_cpardiso->iparm[27] = 1;
808 #else
809       mat_mkl_cpardiso->iparm[27] = 0;
810 #endif
811       mat_mkl_cpardiso->iparm[34] = 1;
812     }
813   }
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "MatMkl_CPardisoSetCntl"
819 /*@
820   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
821 
822    Logically Collective on Mat
823 
824    Input Parameters:
825 +  F - the factored matrix obtained by calling MatGetFactor()
826 .  icntl - index of Mkl_Pardiso parameter
827 -  ival - value of Mkl_Pardiso parameter
828 
829   Options Database:
830 .   -mat_mkl_cpardiso_<icntl> <ival>
831 
832    Level: beginner
833 
834    References: Mkl_Pardiso Users' Guide
835 
836 .seealso: MatGetFactor()
837 @*/
838 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
839 {
840   PetscErrorCode ierr;
841 
842   PetscFunctionBegin;
843   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
844   PetscFunctionReturn(0);
845 }
846 
847 #undef __FUNCT__
848 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_cpardiso"
849 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type)
850 {
851   PetscFunctionBegin;
852   *type = MATSOLVERMKL_CPARDISO;
853   PetscFunctionReturn(0);
854 }
855 
856 /* MatGetFactor for MPI AIJ matrices */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetFactor_mpiaij_mkl_cpardiso"
859 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
860 {
861   Mat              B;
862   PetscErrorCode   ierr;
863   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
864   PetscBool        isSeqAIJ;
865 
866   PetscFunctionBegin;
867   /* Create the factorization matrix */
868 
869   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
870   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
871   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
872   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
873 
874   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
875 
876   if (isSeqAIJ) {
877     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
878   mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
879   } else {
880     mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
881     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
882   }
883 
884   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
885   B->ops->destroy = MatDestroy_MKL_CPARDISO;
886 
887   B->ops->view    = MatView_MKL_CPARDISO;
888   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
889 
890   B->factortype   = ftype;
891   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
892 
893   B->spptr = mat_mkl_cpardiso;
894 
895   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr);
896   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
897   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
898 
899   *F = B;
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNCT__
904 #define __FUNCT__ "MatSolverPackageRegister_MKL_CPardiso"
905 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void)
906 {
907   PetscErrorCode ierr;
908 
909   PetscFunctionBegin;
910   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
911   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914