xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision 7be5e7486ad090eef6dbc1e09e6fd40be450d1fc)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
12 #define MKL_ILP64
13 #endif
14 #include <mkl.h>
15 #include <mkl_cluster_sparse_solver.h>
16 
17 /*
18  *  Possible mkl_cpardiso phases that controls the execution of the solver.
19  *  For more information check mkl_cpardiso manual.
20  */
21 #define JOB_ANALYSIS 11
22 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
23 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
24 #define JOB_NUMERICAL_FACTORIZATION 22
25 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
26 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
27 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
28 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
29 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
30 #define JOB_RELEASE_OF_LU_MEMORY 0
31 #define JOB_RELEASE_OF_ALL_MEMORY -1
32 
33 #define IPARM_SIZE 64
34 #define INT_TYPE MKL_INT
35 
36 static const char *Err_MSG_CPardiso(int errNo){
37   switch (errNo) {
38     case -1:
39       return "input inconsistent"; break;
40     case -2:
41       return "not enough memory"; break;
42     case -3:
43       return "reordering problem"; break;
44     case -4:
45       return "zero pivot, numerical factorization or iterative refinement problem"; break;
46     case -5:
47       return "unclassified (internal) error"; break;
48     case -6:
49       return "preordering failed (matrix types 11, 13 only)"; break;
50     case -7:
51       return "diagonal matrix problem"; break;
52     case -8:
53       return "32-bit integer overflow problem"; break;
54     case -9:
55       return "not enough memory for OOC"; break;
56     case -10:
57       return "problems with opening OOC temporary files"; break;
58     case -11:
59       return "read/write problems with the OOC data file"; break;
60     default :
61       return "unknown error";
62   }
63 }
64 
65 /*
66  *  Internal data structure.
67  *  For more information check mkl_cpardiso manual.
68  */
69 
70 typedef struct {
71 
72   /* Configuration vector */
73   INT_TYPE     iparm[IPARM_SIZE];
74 
75   /*
76    * Internal mkl_cpardiso memory location.
77    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
78    */
79   void         *pt[IPARM_SIZE];
80 
81   MPI_Comm     comm_mkl_cpardiso;
82 
83   /* Basic mkl_cpardiso info*/
84   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
85 
86   /* Matrix structure */
87   PetscScalar  *a;
88 
89   INT_TYPE     *ia, *ja;
90 
91   /* Number of non-zero elements */
92   INT_TYPE     nz;
93 
94   /* Row permutaton vector*/
95   INT_TYPE     *perm;
96 
97   /* Define is matrix preserve sparce structure. */
98   MatStructure matstruc;
99 
100   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
101 
102   /* True if mkl_cpardiso function have been used. */
103   PetscBool CleanUp;
104 } Mat_MKL_CPARDISO;
105 
106 /*
107  * Copy the elements of matrix A.
108  * Input:
109  *   - Mat A: MATSEQAIJ matrix
110  *   - int shift: matrix index.
111  *     - 0 for c representation
112  *     - 1 for fortran representation
113  *   - MatReuse reuse:
114  *     - MAT_INITIAL_MATRIX: Create a new aij representation
115  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
116  * Output:
117  *   - int *nnz: Number of nonzero-elements.
118  *   - int **r pointer to i index
119  *   - int **c pointer to j elements
120  *   - MATRIXTYPE **v: Non-zero elements
121  */
122 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
123 {
124   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
125 
126   PetscFunctionBegin;
127   *v=aa->a;
128   if (reuse == MAT_INITIAL_MATRIX) {
129     *r   = (INT_TYPE*)aa->i;
130     *c   = (INT_TYPE*)aa->j;
131     *nnz = aa->nz;
132   }
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
137 {
138   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
139   PetscErrorCode    ierr;
140   PetscInt          rstart,nz,i,j,countA,countB;
141   PetscInt          *row,*col;
142   const PetscScalar *av, *bv;
143   PetscScalar       *val;
144   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
145   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
146   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
147   PetscInt          colA_start,jB,jcol;
148 
149   PetscFunctionBegin;
150   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
151   av=aa->a; bv=bb->a;
152 
153   garray = mat->garray;
154 
155   if (reuse == MAT_INITIAL_MATRIX) {
156     nz   = aa->nz + bb->nz;
157     *nnz = nz;
158     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
159     col  = row + m + 1;
160     val  = (PetscScalar*)(col + nz);
161     *r = row; *c = col; *v = val;
162     row[0] = 0;
163   } else {
164     row = *r; col = *c; val = *v;
165   }
166 
167   nz = 0;
168   for (i=0; i<m; i++) {
169     row[i] = nz;
170     countA     = ai[i+1] - ai[i];
171     countB     = bi[i+1] - bi[i];
172     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
173     bjj        = bj + bi[i];
174 
175     /* B part, smaller col index */
176     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
177     jB         = 0;
178     for (j=0; j<countB; j++) {
179       jcol = garray[bjj[j]];
180       if (jcol > colA_start) {
181         jB = j;
182         break;
183       }
184       col[nz]   = jcol;
185       val[nz++] = *bv++;
186       if (j==countB-1) jB = countB;
187     }
188 
189     /* A part */
190     for (j=0; j<countA; j++) {
191       col[nz]   = rstart + ajj[j];
192       val[nz++] = *av++;
193     }
194 
195     /* B part, larger col index */
196     for (j=jB; j<countB; j++) {
197       col[nz]   = garray[bjj[j]];
198       val[nz++] = *bv++;
199     }
200   }
201   row[m] = nz;
202 
203   PetscFunctionReturn(0);
204 }
205 
206 /*
207  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
208  */
209 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
210 {
211   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
212   PetscErrorCode   ierr;
213 
214   PetscFunctionBegin;
215   /* Terminate instance, deallocate memories */
216   if (mat_mkl_cpardiso->CleanUp) {
217     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
218 
219     cluster_sparse_solver (
220       mat_mkl_cpardiso->pt,
221       &mat_mkl_cpardiso->maxfct,
222       &mat_mkl_cpardiso->mnum,
223       &mat_mkl_cpardiso->mtype,
224       &mat_mkl_cpardiso->phase,
225       &mat_mkl_cpardiso->n,
226       NULL,
227       NULL,
228       NULL,
229       mat_mkl_cpardiso->perm,
230       &mat_mkl_cpardiso->nrhs,
231       mat_mkl_cpardiso->iparm,
232       &mat_mkl_cpardiso->msglvl,
233       NULL,
234       NULL,
235       &mat_mkl_cpardiso->comm_mkl_cpardiso,
236       (int*)&mat_mkl_cpardiso->err);
237   }
238 
239   if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) {
240     ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr);
241   }
242   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
243   ierr = PetscFree(A->data);CHKERRQ(ierr);
244 
245   /* clear composed functions */
246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 /*
252  * Computes Ax = b
253  */
254 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
255 {
256   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
257   PetscErrorCode    ierr;
258   PetscScalar       *xarray;
259   const PetscScalar *barray;
260 
261   PetscFunctionBegin;
262   mat_mkl_cpardiso->nrhs = 1;
263   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
264   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
265 
266   /* solve phase */
267   /*-------------*/
268   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
269   cluster_sparse_solver (
270     mat_mkl_cpardiso->pt,
271     &mat_mkl_cpardiso->maxfct,
272     &mat_mkl_cpardiso->mnum,
273     &mat_mkl_cpardiso->mtype,
274     &mat_mkl_cpardiso->phase,
275     &mat_mkl_cpardiso->n,
276     mat_mkl_cpardiso->a,
277     mat_mkl_cpardiso->ia,
278     mat_mkl_cpardiso->ja,
279     mat_mkl_cpardiso->perm,
280     &mat_mkl_cpardiso->nrhs,
281     mat_mkl_cpardiso->iparm,
282     &mat_mkl_cpardiso->msglvl,
283     (void*)barray,
284     (void*)xarray,
285     &mat_mkl_cpardiso->comm_mkl_cpardiso,
286     (int*)&mat_mkl_cpardiso->err);
287 
288   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
289 
290   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
291   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
292   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
293   PetscFunctionReturn(0);
294 }
295 
296 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
297 {
298   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
299   PetscErrorCode   ierr;
300 
301   PetscFunctionBegin;
302 #if defined(PETSC_USE_COMPLEX)
303   mat_mkl_cpardiso->iparm[12 - 1] = 1;
304 #else
305   mat_mkl_cpardiso->iparm[12 - 1] = 2;
306 #endif
307   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
308   mat_mkl_cpardiso->iparm[12 - 1] = 0;
309   PetscFunctionReturn(0);
310 }
311 
312 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
313 {
314   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
315   PetscErrorCode    ierr;
316   PetscScalar       *xarray;
317   const PetscScalar *barray;
318 
319   PetscFunctionBegin;
320   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
321 
322   if(mat_mkl_cpardiso->nrhs > 0){
323     ierr = MatDenseGetArrayRead(B,&barray);
324     ierr = MatDenseGetArray(X,&xarray);
325 
326     /* solve phase */
327     /*-------------*/
328     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
329     cluster_sparse_solver (
330       mat_mkl_cpardiso->pt,
331       &mat_mkl_cpardiso->maxfct,
332       &mat_mkl_cpardiso->mnum,
333       &mat_mkl_cpardiso->mtype,
334       &mat_mkl_cpardiso->phase,
335       &mat_mkl_cpardiso->n,
336       mat_mkl_cpardiso->a,
337       mat_mkl_cpardiso->ia,
338       mat_mkl_cpardiso->ja,
339       mat_mkl_cpardiso->perm,
340       &mat_mkl_cpardiso->nrhs,
341       mat_mkl_cpardiso->iparm,
342       &mat_mkl_cpardiso->msglvl,
343       (void*)barray,
344       (void*)xarray,
345       &mat_mkl_cpardiso->comm_mkl_cpardiso,
346       (int*)&mat_mkl_cpardiso->err);
347     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
348     ierr = MatDenseRestoreArrayRead(B,&barray);
349     ierr = MatDenseRestoreArray(X,&xarray);
350 
351   }
352   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
353   PetscFunctionReturn(0);
354 
355 }
356 
357 /*
358  * LU Decomposition
359  */
360 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
361 {
362   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
363   PetscErrorCode   ierr;
364 
365   PetscFunctionBegin;
366   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
367   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
368 
369   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
370   cluster_sparse_solver (
371     mat_mkl_cpardiso->pt,
372     &mat_mkl_cpardiso->maxfct,
373     &mat_mkl_cpardiso->mnum,
374     &mat_mkl_cpardiso->mtype,
375     &mat_mkl_cpardiso->phase,
376     &mat_mkl_cpardiso->n,
377     mat_mkl_cpardiso->a,
378     mat_mkl_cpardiso->ia,
379     mat_mkl_cpardiso->ja,
380     mat_mkl_cpardiso->perm,
381     &mat_mkl_cpardiso->nrhs,
382     mat_mkl_cpardiso->iparm,
383     &mat_mkl_cpardiso->msglvl,
384     NULL,
385     NULL,
386     &mat_mkl_cpardiso->comm_mkl_cpardiso,
387     &mat_mkl_cpardiso->err);
388   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
389 
390   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
391   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
392   PetscFunctionReturn(0);
393 }
394 
395 /* Sets mkl_cpardiso options from the options database */
396 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
397 {
398   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
399   PetscErrorCode      ierr;
400   PetscInt            icntl;
401   PetscBool           flg;
402   int                 threads;
403 
404   PetscFunctionBegin;
405   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
406   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr);
407   if (flg) mkl_set_num_threads(threads);
408 
409   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr);
410   if (flg) mat_mkl_cpardiso->maxfct = icntl;
411 
412   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr);
413   if (flg) mat_mkl_cpardiso->mnum = icntl;
414 
415   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr);
416   if (flg) mat_mkl_cpardiso->msglvl = icntl;
417 
418   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr);
419   if(flg){
420     mat_mkl_cpardiso->mtype = icntl;
421 #if defined(PETSC_USE_REAL_SINGLE)
422     mat_mkl_cpardiso->iparm[27] = 1;
423 #else
424     mat_mkl_cpardiso->iparm[27] = 0;
425 #endif
426     mat_mkl_cpardiso->iparm[34] = 1;
427   }
428   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr);
429 
430   if(flg && icntl != 0){
431     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr);
432     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
433 
434     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr);
435     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
436 
437     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr);
438     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
439 
440     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr);
441     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
442 
443     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr);
444     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
445 
446     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr);
447     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
448 
449     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr);
450     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
451 
452     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr);
453     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
454 
455     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
456       &flg);CHKERRQ(ierr);
457     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
458 
459     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
460       &flg);CHKERRQ(ierr);
461     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
462 
463     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr);
464     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
465 
466     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr);
467     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
468 
469     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr);
470     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
471 
472     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr);
473     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
474 
475     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr);
476     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
477 
478     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr);
479     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
480 
481     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr);
482     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
483 
484     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr);
485     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
486   }
487 
488   ierr = PetscOptionsEnd();CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
493 {
494   PetscErrorCode  ierr;
495   PetscMPIInt     size;
496 
497   PetscFunctionBegin;
498 
499   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
500   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
501 
502   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
503   mat_mkl_cpardiso->maxfct = 1;
504   mat_mkl_cpardiso->mnum = 1;
505   mat_mkl_cpardiso->n = A->rmap->N;
506   mat_mkl_cpardiso->msglvl = 0;
507   mat_mkl_cpardiso->nrhs = 1;
508   mat_mkl_cpardiso->err = 0;
509   mat_mkl_cpardiso->phase = -1;
510 #if defined(PETSC_USE_COMPLEX)
511   mat_mkl_cpardiso->mtype = 13;
512 #else
513   mat_mkl_cpardiso->mtype = 11;
514 #endif
515 
516 #if defined(PETSC_USE_REAL_SINGLE)
517   mat_mkl_cpardiso->iparm[27] = 1;
518 #else
519   mat_mkl_cpardiso->iparm[27] = 0;
520 #endif
521 
522   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
523 
524   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
525   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
526   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
527   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
528   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
529   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
530   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
531   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
532   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
533   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
534 
535   mat_mkl_cpardiso->iparm[39] = 0;
536   if (size > 1) {
537     mat_mkl_cpardiso->iparm[39] = 2;
538     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
539     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
540   }
541   mat_mkl_cpardiso->perm = 0;
542   PetscFunctionReturn(0);
543 }
544 
545 /*
546  * Symbolic decomposition. Mkl_Pardiso analysis phase.
547  */
548 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
549 {
550   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
551   PetscErrorCode  ierr;
552 
553   PetscFunctionBegin;
554   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
555 
556   /* Set MKL_CPARDISO options from the options database */
557   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
558   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
559 
560   mat_mkl_cpardiso->n = A->rmap->N;
561 
562   /* analysis phase */
563   /*----------------*/
564   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
565 
566   cluster_sparse_solver (
567     mat_mkl_cpardiso->pt,
568     &mat_mkl_cpardiso->maxfct,
569     &mat_mkl_cpardiso->mnum,
570     &mat_mkl_cpardiso->mtype,
571     &mat_mkl_cpardiso->phase,
572     &mat_mkl_cpardiso->n,
573     mat_mkl_cpardiso->a,
574     mat_mkl_cpardiso->ia,
575     mat_mkl_cpardiso->ja,
576     mat_mkl_cpardiso->perm,
577     &mat_mkl_cpardiso->nrhs,
578     mat_mkl_cpardiso->iparm,
579     &mat_mkl_cpardiso->msglvl,
580     NULL,
581     NULL,
582     &mat_mkl_cpardiso->comm_mkl_cpardiso,
583     (int*)&mat_mkl_cpardiso->err);
584 
585   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
586 
587   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
588   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
589   F->ops->solve           = MatSolve_MKL_CPARDISO;
590   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
591   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
592   PetscFunctionReturn(0);
593 }
594 
595 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
596 {
597   PetscErrorCode    ierr;
598   PetscBool         iascii;
599   PetscViewerFormat format;
600   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
601   PetscInt          i;
602 
603   PetscFunctionBegin;
604   /* check if matrix is mkl_cpardiso type */
605   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
606 
607   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
608   if (iascii) {
609     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
610     if (format == PETSC_VIEWER_ASCII_INFO) {
611       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
612       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
613       for(i = 1; i <= 64; i++){
614         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
615       }
616       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
617       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
618       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
619       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
620       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
621       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
622     }
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
628 {
629   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data;
630 
631   PetscFunctionBegin;
632   info->block_size        = 1.0;
633   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
634   info->nz_unneeded       = 0.0;
635   info->assemblies        = 0.0;
636   info->mallocs           = 0.0;
637   info->memory            = 0.0;
638   info->fill_ratio_given  = 0;
639   info->fill_ratio_needed = 0;
640   info->factor_mallocs    = 0;
641   PetscFunctionReturn(0);
642 }
643 
644 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
645 {
646   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data;
647 
648   PetscFunctionBegin;
649   if(icntl <= 64){
650     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
651   } else {
652     if(icntl == 65) mkl_set_num_threads((int)ival);
653     else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival;
654     else if(icntl == 67) mat_mkl_cpardiso->mnum = ival;
655     else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival;
656     else if(icntl == 69){
657       mat_mkl_cpardiso->mtype = ival;
658 #if defined(PETSC_USE_REAL_SINGLE)
659       mat_mkl_cpardiso->iparm[27] = 1;
660 #else
661       mat_mkl_cpardiso->iparm[27] = 0;
662 #endif
663       mat_mkl_cpardiso->iparm[34] = 1;
664     }
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 /*@
670   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
671 
672    Logically Collective on Mat
673 
674    Input Parameters:
675 +  F - the factored matrix obtained by calling MatGetFactor()
676 .  icntl - index of Mkl_Pardiso parameter
677 -  ival - value of Mkl_Pardiso parameter
678 
679   Options Database:
680 .   -mat_mkl_cpardiso_<icntl> <ival>
681 
682    Level: Intermediate
683 
684    Notes:
685     This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options
686           database approach when working with TS, SNES, or KSP.
687 
688    References:
689 .      Mkl_Pardiso Users' Guide
690 
691 .seealso: MatGetFactor()
692 @*/
693 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
694 {
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
699   PetscFunctionReturn(0);
700 }
701 
702 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
703 {
704   PetscFunctionBegin;
705   *type = MATSOLVERMKL_CPARDISO;
706   PetscFunctionReturn(0);
707 }
708 
709 /* MatGetFactor for MPI AIJ matrices */
710 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
711 {
712   Mat              B;
713   PetscErrorCode   ierr;
714   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
715   PetscBool        isSeqAIJ;
716 
717   PetscFunctionBegin;
718   /* Create the factorization matrix */
719 
720   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
721   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
722   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
723   ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr);
724   ierr = MatSetUp(B);CHKERRQ(ierr);
725 
726   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
727 
728   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
729   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
730 
731   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
732   B->ops->destroy = MatDestroy_MKL_CPARDISO;
733 
734   B->ops->view    = MatView_MKL_CPARDISO;
735   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
736 
737   B->factortype   = ftype;
738   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
739 
740   B->data = mat_mkl_cpardiso;
741 
742   /* set solvertype */
743   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
744   ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr);
745 
746   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr);
747   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
748   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
749 
750   *F = B;
751   PetscFunctionReturn(0);
752 }
753 
754 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
755 {
756   PetscErrorCode ierr;
757 
758   PetscFunctionBegin;
759   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
760   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
761   PetscFunctionReturn(0);
762 }
763