xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision e7f46db8d62cb2e4e59111bf21061e64ea60daab)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #include <mkl.h>
12 #include <mkl_cluster_sparse_solver.h>
13 
14 /*
15  *  Possible mkl_cpardiso phases that controls the execution of the solver.
16  *  For more information check mkl_cpardiso manual.
17  */
18 #define JOB_ANALYSIS 11
19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
21 #define JOB_NUMERICAL_FACTORIZATION 22
22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
27 #define JOB_RELEASE_OF_LU_MEMORY 0
28 #define JOB_RELEASE_OF_ALL_MEMORY -1
29 
30 #define IPARM_SIZE 64
31 #define INT_TYPE MKL_INT
32 
33 static const char *Err_MSG_CPardiso(int errNo){
34   switch (errNo) {
35     case -1:
36       return "input inconsistent"; break;
37     case -2:
38       return "not enough memory"; break;
39     case -3:
40       return "reordering problem"; break;
41     case -4:
42       return "zero pivot, numerical factorization or iterative refinement problem"; break;
43     case -5:
44       return "unclassified (internal) error"; break;
45     case -6:
46       return "preordering failed (matrix types 11, 13 only)"; break;
47     case -7:
48       return "diagonal matrix problem"; break;
49     case -8:
50       return "32-bit integer overflow problem"; break;
51     case -9:
52       return "not enough memory for OOC"; break;
53     case -10:
54       return "problems with opening OOC temporary files"; break;
55     case -11:
56       return "read/write problems with the OOC data file"; break;
57     default :
58       return "unknown error";
59   }
60 }
61 
62 /*
63  *  Internal data structure.
64  *  For more information check mkl_cpardiso manual.
65  */
66 
67 typedef struct {
68 
69   /* Configuration vector */
70   INT_TYPE     iparm[IPARM_SIZE];
71 
72   /*
73    * Internal mkl_cpardiso memory location.
74    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
75    */
76   void         *pt[IPARM_SIZE];
77 
78   MPI_Comm     comm_mkl_cpardiso;
79 
80   /* Basic mkl_cpardiso info*/
81   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
82 
83   /* Matrix structure */
84   PetscScalar  *a;
85 
86   INT_TYPE     *ia, *ja;
87 
88   /* Number of non-zero elements */
89   INT_TYPE     nz;
90 
91   /* Row permutaton vector*/
92   INT_TYPE     *perm;
93 
94   /* Define is matrix preserve sparce structure. */
95   MatStructure matstruc;
96 
97   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
98 
99   /* True if mkl_cpardiso function have been used. */
100   PetscBool CleanUp;
101 } Mat_MKL_CPARDISO;
102 
103 /*
104  * Copy the elements of matrix A.
105  * Input:
106  *   - Mat A: MATSEQAIJ matrix
107  *   - int shift: matrix index.
108  *     - 0 for c representation
109  *     - 1 for fortran representation
110  *   - MatReuse reuse:
111  *     - MAT_INITIAL_MATRIX: Create a new aij representation
112  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
113  * Output:
114  *   - int *nnz: Number of nonzero-elements.
115  *   - int **r pointer to i index
116  *   - int **c pointer to j elements
117  *   - MATRIXTYPE **v: Non-zero elements
118  */
119 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
120 {
121   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
122 
123   PetscFunctionBegin;
124   *v=aa->a;
125   if (reuse == MAT_INITIAL_MATRIX) {
126     *r   = (INT_TYPE*)aa->i;
127     *c   = (INT_TYPE*)aa->j;
128     *nnz = aa->nz;
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
134 {
135   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
136   PetscErrorCode    ierr;
137   PetscInt          rstart,nz,i,j,countA,countB;
138   PetscInt          *row,*col;
139   const PetscScalar *av, *bv;
140   PetscScalar       *val;
141   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
142   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
143   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
144   PetscInt          colA_start,jB,jcol;
145 
146   PetscFunctionBegin;
147   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
148   av=aa->a; bv=bb->a;
149 
150   garray = mat->garray;
151 
152   if (reuse == MAT_INITIAL_MATRIX) {
153     nz   = aa->nz + bb->nz;
154     *nnz = nz;
155     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
156     col  = row + m + 1;
157     val  = (PetscScalar*)(col + nz);
158     *r = row; *c = col; *v = val;
159     row[0] = 0;
160   } else {
161     row = *r; col = *c; val = *v;
162   }
163 
164   nz = 0;
165   for (i=0; i<m; i++) {
166     row[i] = nz;
167     countA     = ai[i+1] - ai[i];
168     countB     = bi[i+1] - bi[i];
169     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
170     bjj        = bj + bi[i];
171 
172     /* B part, smaller col index */
173     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
174     jB         = 0;
175     for (j=0; j<countB; j++) {
176       jcol = garray[bjj[j]];
177       if (jcol > colA_start) {
178         jB = j;
179         break;
180       }
181       col[nz]   = jcol;
182       val[nz++] = *bv++;
183       if (j==countB-1) jB = countB;
184     }
185 
186     /* A part */
187     for (j=0; j<countA; j++) {
188       col[nz]   = rstart + ajj[j];
189       val[nz++] = *av++;
190     }
191 
192     /* B part, larger col index */
193     for (j=jB; j<countB; j++) {
194       col[nz]   = garray[bjj[j]];
195       val[nz++] = *bv++;
196     }
197   }
198   row[m] = nz;
199 
200   PetscFunctionReturn(0);
201 }
202 
203 /*
204  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
205  */
206 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
207 {
208   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
209   PetscErrorCode   ierr;
210 
211   PetscFunctionBegin;
212   /* Terminate instance, deallocate memories */
213   if (mat_mkl_cpardiso->CleanUp) {
214     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
215 
216     cluster_sparse_solver (
217       mat_mkl_cpardiso->pt,
218       &mat_mkl_cpardiso->maxfct,
219       &mat_mkl_cpardiso->mnum,
220       &mat_mkl_cpardiso->mtype,
221       &mat_mkl_cpardiso->phase,
222       &mat_mkl_cpardiso->n,
223       NULL,
224       NULL,
225       NULL,
226       mat_mkl_cpardiso->perm,
227       &mat_mkl_cpardiso->nrhs,
228       mat_mkl_cpardiso->iparm,
229       &mat_mkl_cpardiso->msglvl,
230       NULL,
231       NULL,
232       &mat_mkl_cpardiso->comm_mkl_cpardiso,
233       (int*)&mat_mkl_cpardiso->err);
234   }
235 
236   if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) {
237     ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr);
238   }
239   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
240   ierr = PetscFree(A->data);CHKERRQ(ierr);
241 
242   /* clear composed functions */
243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
244   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 /*
249  * Computes Ax = b
250  */
251 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
252 {
253   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
254   PetscErrorCode    ierr;
255   PetscScalar       *xarray;
256   const PetscScalar *barray;
257 
258   PetscFunctionBegin;
259   mat_mkl_cpardiso->nrhs = 1;
260   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
261   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
262 
263   /* solve phase */
264   /*-------------*/
265   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
266   cluster_sparse_solver (
267     mat_mkl_cpardiso->pt,
268     &mat_mkl_cpardiso->maxfct,
269     &mat_mkl_cpardiso->mnum,
270     &mat_mkl_cpardiso->mtype,
271     &mat_mkl_cpardiso->phase,
272     &mat_mkl_cpardiso->n,
273     mat_mkl_cpardiso->a,
274     mat_mkl_cpardiso->ia,
275     mat_mkl_cpardiso->ja,
276     mat_mkl_cpardiso->perm,
277     &mat_mkl_cpardiso->nrhs,
278     mat_mkl_cpardiso->iparm,
279     &mat_mkl_cpardiso->msglvl,
280     (void*)barray,
281     (void*)xarray,
282     &mat_mkl_cpardiso->comm_mkl_cpardiso,
283     (int*)&mat_mkl_cpardiso->err);
284 
285   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
286 
287   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
288   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
289   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
290   PetscFunctionReturn(0);
291 }
292 
293 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
294 {
295   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
296   PetscErrorCode   ierr;
297 
298   PetscFunctionBegin;
299 #if defined(PETSC_USE_COMPLEX)
300   mat_mkl_cpardiso->iparm[12 - 1] = 1;
301 #else
302   mat_mkl_cpardiso->iparm[12 - 1] = 2;
303 #endif
304   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
305   mat_mkl_cpardiso->iparm[12 - 1] = 0;
306   PetscFunctionReturn(0);
307 }
308 
309 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
310 {
311   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
312   PetscErrorCode    ierr;
313   PetscScalar       *xarray;
314   const PetscScalar *barray;
315   PetscBool         flg;
316 
317   PetscFunctionBegin;
318   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr);
319   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
320   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr);
321   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
322 
323   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
324 
325   if(mat_mkl_cpardiso->nrhs > 0){
326     ierr = MatDenseGetArrayRead(B,&barray);
327     ierr = MatDenseGetArray(X,&xarray);
328 
329     /* solve phase */
330     /*-------------*/
331     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
332     cluster_sparse_solver (
333       mat_mkl_cpardiso->pt,
334       &mat_mkl_cpardiso->maxfct,
335       &mat_mkl_cpardiso->mnum,
336       &mat_mkl_cpardiso->mtype,
337       &mat_mkl_cpardiso->phase,
338       &mat_mkl_cpardiso->n,
339       mat_mkl_cpardiso->a,
340       mat_mkl_cpardiso->ia,
341       mat_mkl_cpardiso->ja,
342       mat_mkl_cpardiso->perm,
343       &mat_mkl_cpardiso->nrhs,
344       mat_mkl_cpardiso->iparm,
345       &mat_mkl_cpardiso->msglvl,
346       (void*)barray,
347       (void*)xarray,
348       &mat_mkl_cpardiso->comm_mkl_cpardiso,
349       (int*)&mat_mkl_cpardiso->err);
350     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
351     ierr = MatDenseRestoreArrayRead(B,&barray);
352     ierr = MatDenseRestoreArray(X,&xarray);
353 
354   }
355   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
356   PetscFunctionReturn(0);
357 
358 }
359 
360 /*
361  * LU Decomposition
362  */
363 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
364 {
365   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
366   PetscErrorCode   ierr;
367 
368   PetscFunctionBegin;
369   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
370   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
371 
372   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
373   cluster_sparse_solver (
374     mat_mkl_cpardiso->pt,
375     &mat_mkl_cpardiso->maxfct,
376     &mat_mkl_cpardiso->mnum,
377     &mat_mkl_cpardiso->mtype,
378     &mat_mkl_cpardiso->phase,
379     &mat_mkl_cpardiso->n,
380     mat_mkl_cpardiso->a,
381     mat_mkl_cpardiso->ia,
382     mat_mkl_cpardiso->ja,
383     mat_mkl_cpardiso->perm,
384     &mat_mkl_cpardiso->nrhs,
385     mat_mkl_cpardiso->iparm,
386     &mat_mkl_cpardiso->msglvl,
387     NULL,
388     NULL,
389     &mat_mkl_cpardiso->comm_mkl_cpardiso,
390     &mat_mkl_cpardiso->err);
391   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
392 
393   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
394   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
395   PetscFunctionReturn(0);
396 }
397 
398 /* Sets mkl_cpardiso options from the options database */
399 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
400 {
401   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
402   PetscErrorCode      ierr;
403   PetscInt            icntl;
404   PetscBool           flg;
405   int                 threads;
406 
407   PetscFunctionBegin;
408   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
409   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr);
410   if (flg) mkl_set_num_threads(threads);
411 
412   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr);
413   if (flg) mat_mkl_cpardiso->maxfct = icntl;
414 
415   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr);
416   if (flg) mat_mkl_cpardiso->mnum = icntl;
417 
418   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr);
419   if (flg) mat_mkl_cpardiso->msglvl = icntl;
420 
421   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr);
422   if(flg){
423     mat_mkl_cpardiso->mtype = icntl;
424 #if defined(PETSC_USE_REAL_SINGLE)
425     mat_mkl_cpardiso->iparm[27] = 1;
426 #else
427     mat_mkl_cpardiso->iparm[27] = 0;
428 #endif
429     mat_mkl_cpardiso->iparm[34] = 1;
430   }
431   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr);
432 
433   if(flg && icntl != 0){
434     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr);
435     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
436 
437     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr);
438     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
439 
440     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr);
441     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
442 
443     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr);
444     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
445 
446     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr);
447     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
448 
449     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr);
450     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
451 
452     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr);
453     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
454 
455     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr);
456     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
457 
458     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
459       &flg);CHKERRQ(ierr);
460     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
461 
462     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
463       &flg);CHKERRQ(ierr);
464     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
465 
466     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr);
467     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
468 
469     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr);
470     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
471 
472     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr);
473     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
474 
475     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr);
476     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
477 
478     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr);
479     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
480 
481     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr);
482     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
483 
484     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr);
485     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
486 
487     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr);
488     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
489   }
490 
491   ierr = PetscOptionsEnd();CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
496 {
497   PetscErrorCode  ierr;
498   PetscMPIInt     size;
499 
500   PetscFunctionBegin;
501 
502   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
503   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
504 
505   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
506   mat_mkl_cpardiso->maxfct = 1;
507   mat_mkl_cpardiso->mnum = 1;
508   mat_mkl_cpardiso->n = A->rmap->N;
509   mat_mkl_cpardiso->msglvl = 0;
510   mat_mkl_cpardiso->nrhs = 1;
511   mat_mkl_cpardiso->err = 0;
512   mat_mkl_cpardiso->phase = -1;
513 #if defined(PETSC_USE_COMPLEX)
514   mat_mkl_cpardiso->mtype = 13;
515 #else
516   mat_mkl_cpardiso->mtype = 11;
517 #endif
518 
519 #if defined(PETSC_USE_REAL_SINGLE)
520   mat_mkl_cpardiso->iparm[27] = 1;
521 #else
522   mat_mkl_cpardiso->iparm[27] = 0;
523 #endif
524 
525   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
526 
527   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
528   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
529   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
530   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
531   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
532   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
533   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
534   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
535   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
536   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
537 
538   mat_mkl_cpardiso->iparm[39] = 0;
539   if (size > 1) {
540     mat_mkl_cpardiso->iparm[39] = 2;
541     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
542     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
543   }
544   mat_mkl_cpardiso->perm = 0;
545   PetscFunctionReturn(0);
546 }
547 
548 /*
549  * Symbolic decomposition. Mkl_Pardiso analysis phase.
550  */
551 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
552 {
553   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
554   PetscErrorCode  ierr;
555 
556   PetscFunctionBegin;
557   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
558 
559   /* Set MKL_CPARDISO options from the options database */
560   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
561   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
562 
563   mat_mkl_cpardiso->n = A->rmap->N;
564 
565   /* analysis phase */
566   /*----------------*/
567   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
568 
569   cluster_sparse_solver (
570     mat_mkl_cpardiso->pt,
571     &mat_mkl_cpardiso->maxfct,
572     &mat_mkl_cpardiso->mnum,
573     &mat_mkl_cpardiso->mtype,
574     &mat_mkl_cpardiso->phase,
575     &mat_mkl_cpardiso->n,
576     mat_mkl_cpardiso->a,
577     mat_mkl_cpardiso->ia,
578     mat_mkl_cpardiso->ja,
579     mat_mkl_cpardiso->perm,
580     &mat_mkl_cpardiso->nrhs,
581     mat_mkl_cpardiso->iparm,
582     &mat_mkl_cpardiso->msglvl,
583     NULL,
584     NULL,
585     &mat_mkl_cpardiso->comm_mkl_cpardiso,
586     (int*)&mat_mkl_cpardiso->err);
587 
588   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
589 
590   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
591   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
592   F->ops->solve           = MatSolve_MKL_CPARDISO;
593   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
594   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
595   PetscFunctionReturn(0);
596 }
597 
598 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
599 {
600   PetscErrorCode    ierr;
601   PetscBool         iascii;
602   PetscViewerFormat format;
603   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
604   PetscInt          i;
605 
606   PetscFunctionBegin;
607   /* check if matrix is mkl_cpardiso type */
608   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
609 
610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
611   if (iascii) {
612     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
613     if (format == PETSC_VIEWER_ASCII_INFO) {
614       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
615       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
616       for(i = 1; i <= 64; i++){
617         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
618       }
619       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
620       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
621       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
622       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
623       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
624       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
625     }
626   }
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
631 {
632   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data;
633 
634   PetscFunctionBegin;
635   info->block_size        = 1.0;
636   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
637   info->nz_unneeded       = 0.0;
638   info->assemblies        = 0.0;
639   info->mallocs           = 0.0;
640   info->memory            = 0.0;
641   info->fill_ratio_given  = 0;
642   info->fill_ratio_needed = 0;
643   info->factor_mallocs    = 0;
644   PetscFunctionReturn(0);
645 }
646 
647 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
648 {
649   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data;
650 
651   PetscFunctionBegin;
652   if(icntl <= 64){
653     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
654   } else {
655     if(icntl == 65) mkl_set_num_threads((int)ival);
656     else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival;
657     else if(icntl == 67) mat_mkl_cpardiso->mnum = ival;
658     else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival;
659     else if(icntl == 69){
660       mat_mkl_cpardiso->mtype = ival;
661 #if defined(PETSC_USE_REAL_SINGLE)
662       mat_mkl_cpardiso->iparm[27] = 1;
663 #else
664       mat_mkl_cpardiso->iparm[27] = 0;
665 #endif
666       mat_mkl_cpardiso->iparm[34] = 1;
667     }
668   }
669   PetscFunctionReturn(0);
670 }
671 
672 /*@
673   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
674 
675    Logically Collective on Mat
676 
677    Input Parameters:
678 +  F - the factored matrix obtained by calling MatGetFactor()
679 .  icntl - index of Mkl_Pardiso parameter
680 -  ival - value of Mkl_Pardiso parameter
681 
682   Options Database:
683 .   -mat_mkl_cpardiso_<icntl> <ival>
684 
685    Level: Intermediate
686 
687    Notes: This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options
688           database approach when working with TS, SNES, or KSP.
689 
690    References:
691 .      Mkl_Pardiso Users' Guide
692 
693 .seealso: MatGetFactor()
694 @*/
695 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
696 {
697   PetscErrorCode ierr;
698 
699   PetscFunctionBegin;
700   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
705 {
706   PetscFunctionBegin;
707   *type = MATSOLVERMKL_CPARDISO;
708   PetscFunctionReturn(0);
709 }
710 
711 /* MatGetFactor for MPI AIJ matrices */
712 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
713 {
714   Mat              B;
715   PetscErrorCode   ierr;
716   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
717   PetscBool        isSeqAIJ;
718 
719   PetscFunctionBegin;
720   /* Create the factorization matrix */
721 
722   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
723   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
724   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
725   ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr);
726   ierr = MatSetUp(B);CHKERRQ(ierr);
727 
728   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
729 
730   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
731   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
732 
733   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
734   B->ops->destroy = MatDestroy_MKL_CPARDISO;
735 
736   B->ops->view    = MatView_MKL_CPARDISO;
737   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
738 
739   B->factortype   = ftype;
740   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
741 
742   B->data = mat_mkl_cpardiso;
743 
744   /* set solvertype */
745   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
746   ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr);
747 
748   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr);
749   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
750   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
751 
752   *F = B;
753   PetscFunctionReturn(0);
754 }
755 
756 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
757 {
758   PetscErrorCode ierr;
759 
760   PetscFunctionBegin;
761   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
762   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765