xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision a7fac7c2b47dbcd8ece0cc2dfdfe0e63be1bb7b5)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #include <mkl.h>
12 #include <mkl_cluster_sparse_solver.h>
13 
14 /*
15  *  Possible mkl_cpardiso phases that controls the execution of the solver.
16  *  For more information check mkl_cpardiso manual.
17  */
18 #define JOB_ANALYSIS 11
19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
21 #define JOB_NUMERICAL_FACTORIZATION 22
22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
27 #define JOB_RELEASE_OF_LU_MEMORY 0
28 #define JOB_RELEASE_OF_ALL_MEMORY -1
29 
30 #define IPARM_SIZE 64
31 #define INT_TYPE MKL_INT
32 
33 static const char *Err_MSG_CPardiso(int errNo){
34   switch (errNo) {
35     case -1:
36       return "input inconsistent"; break;
37     case -2:
38       return "not enough memory"; break;
39     case -3:
40       return "reordering problem"; break;
41     case -4:
42       return "zero pivot, numerical factorization or iterative refinement problem"; break;
43     case -5:
44       return "unclassified (internal) error"; break;
45     case -6:
46       return "preordering failed (matrix types 11, 13 only)"; break;
47     case -7:
48       return "diagonal matrix problem"; break;
49     case -8:
50       return "32-bit integer overflow problem"; break;
51     case -9:
52       return "not enough memory for OOC"; break;
53     case -10:
54       return "problems with opening OOC temporary files"; break;
55     case -11:
56       return "read/write problems with the OOC data file"; break;
57     default :
58       return "unknown error";
59   }
60 }
61 
62 /*
63  *  Internal data structure.
64  *  For more information check mkl_cpardiso manual.
65  */
66 
67 typedef struct {
68 
69   /* Configuration vector */
70   INT_TYPE     iparm[IPARM_SIZE];
71 
72   /*
73    * Internal mkl_cpardiso memory location.
74    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
75    */
76   void         *pt[IPARM_SIZE];
77 
78   MPI_Comm     comm_mkl_cpardiso;
79 
80   /* Basic mkl_cpardiso info*/
81   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
82 
83   /* Matrix structure */
84   PetscScalar  *a;
85 
86   INT_TYPE     *ia, *ja;
87 
88   /* Number of non-zero elements */
89   INT_TYPE     nz;
90 
91   /* Row permutaton vector*/
92   INT_TYPE     *perm;
93 
94   /* Define is matrix preserve sparce structure. */
95   MatStructure matstruc;
96 
97   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
98 
99   /* True if mkl_cpardiso function have been used. */
100   PetscBool CleanUp;
101 } Mat_MKL_CPARDISO;
102 
103 /*
104  * Copy the elements of matrix A.
105  * Input:
106  *   - Mat A: MATSEQAIJ matrix
107  *   - int shift: matrix index.
108  *     - 0 for c representation
109  *     - 1 for fortran representation
110  *   - MatReuse reuse:
111  *     - MAT_INITIAL_MATRIX: Create a new aij representation
112  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
113  * Output:
114  *   - int *nnz: Number of nonzero-elements.
115  *   - int **r pointer to i index
116  *   - int **c pointer to j elements
117  *   - MATRIXTYPE **v: Non-zero elements
118  */
119 #undef __FUNCT__
120 #define __FUNCT__ "MatCopy_seqaij_seqaij_MKL_CPARDISO"
121 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
122 {
123   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
124 
125   PetscFunctionBegin;
126   *v=aa->a;
127   if (reuse == MAT_INITIAL_MATRIX) {
128     *r   = (INT_TYPE*)aa->i;
129     *c   = (INT_TYPE*)aa->j;
130     *nnz = aa->nz;
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO"
137 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
138 {
139   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
140   PetscErrorCode    ierr;
141   PetscInt          rstart,nz,i,j,jj,irow,countA,countB;
142   PetscInt          *row,*col;
143   const PetscScalar *av, *bv,*v1,*v2;
144   PetscScalar       *val;
145   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
146   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
147   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
148   PetscInt          nn, colA_start,jB,jcol;
149 
150   PetscFunctionBegin;
151   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
152   av=aa->a; bv=bb->a;
153 
154   garray = mat->garray;
155 
156   if (reuse == MAT_INITIAL_MATRIX) {
157     nz   = aa->nz + bb->nz;
158     *nnz = nz;
159     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
160     col  = row + m + 1;
161     val  = (PetscScalar*)(col + nz);
162     *r = row; *c = col; *v = val;
163     row[0] = 0;
164   } else {
165     row = *r; col = *c; val = *v;
166   }
167 
168   nz = 0;
169   for (i=0; i<m; i++) {
170     row[i] = nz;
171     countA     = ai[i+1] - ai[i];
172     countB     = bi[i+1] - bi[i];
173     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
174     bjj        = bj + bi[i];
175 
176     /* B part, smaller col index */
177     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
178     jB         = 0;
179     for (j=0; j<countB; j++) {
180       jcol = garray[bjj[j]];
181       if (jcol > colA_start) {
182         jB = j;
183         break;
184       }
185       col[nz]   = jcol;
186       val[nz++] = *bv++;
187       if (j==countB-1) jB = countB;
188     }
189 
190     /* A part */
191     for (j=0; j<countA; j++) {
192       col[nz]   = rstart + ajj[j];
193       val[nz++] = *av++;
194     }
195 
196     /* B part, larger col index */
197     for (j=jB; j<countB; j++) {
198       col[nz]   = garray[bjj[j]];
199       val[nz++] = *bv++;
200     }
201   }
202   row[m] = nz;
203 
204   PetscFunctionReturn(0);
205 }
206 
207 /*
208  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
209  */
210 #undef __FUNCT__
211 #define __FUNCT__ "MatDestroy_MKL_CPARDISO"
212 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
213 {
214   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
215   PetscErrorCode   ierr;
216 
217   PetscFunctionBegin;
218   /* Terminate instance, deallocate memories */
219   if (mat_mkl_cpardiso->CleanUp) {
220     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
221 
222     cluster_sparse_solver (
223       mat_mkl_cpardiso->pt,
224       &mat_mkl_cpardiso->maxfct,
225       &mat_mkl_cpardiso->mnum,
226       &mat_mkl_cpardiso->mtype,
227       &mat_mkl_cpardiso->phase,
228       &mat_mkl_cpardiso->n,
229       NULL,
230       NULL,
231       NULL,
232       mat_mkl_cpardiso->perm,
233       &mat_mkl_cpardiso->nrhs,
234       mat_mkl_cpardiso->iparm,
235       &mat_mkl_cpardiso->msglvl,
236       NULL,
237       NULL,
238       &mat_mkl_cpardiso->comm_mkl_cpardiso,
239       &mat_mkl_cpardiso->err);
240   }
241   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
242   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
243 
244   /* clear composed functions */
245   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251  * Computes Ax = b
252  */
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSolve_MKL_CPARDISO"
255 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
256 {
257   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr;
258   PetscErrorCode    ierr;
259   PetscScalar       *xarray;
260   const PetscScalar *barray;
261 
262   PetscFunctionBegin;
263   mat_mkl_cpardiso->nrhs = 1;
264   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
265   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
266 
267   /* solve phase */
268   /*-------------*/
269   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
270   cluster_sparse_solver (
271     mat_mkl_cpardiso->pt,
272     &mat_mkl_cpardiso->maxfct,
273     &mat_mkl_cpardiso->mnum,
274     &mat_mkl_cpardiso->mtype,
275     &mat_mkl_cpardiso->phase,
276     &mat_mkl_cpardiso->n,
277     mat_mkl_cpardiso->a,
278     mat_mkl_cpardiso->ia,
279     mat_mkl_cpardiso->ja,
280     mat_mkl_cpardiso->perm,
281     &mat_mkl_cpardiso->nrhs,
282     mat_mkl_cpardiso->iparm,
283     &mat_mkl_cpardiso->msglvl,
284     (void*)barray,
285     (void*)xarray,
286     &mat_mkl_cpardiso->comm_mkl_cpardiso,
287     &mat_mkl_cpardiso->err);
288 
289   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
290 
291   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
292   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
293   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "MatSolveTranspose_MKL_CPARDISO"
299 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
300 {
301   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
302   PetscErrorCode   ierr;
303 
304   PetscFunctionBegin;
305 #if defined(PETSC_USE_COMPLEX)
306   mat_mkl_cpardiso->iparm[12 - 1] = 1;
307 #else
308   mat_mkl_cpardiso->iparm[12 - 1] = 2;
309 #endif
310   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
311   mat_mkl_cpardiso->iparm[12 - 1] = 0;
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatMatSolve_MKL_CPARDISO"
317 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
318 {
319   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr;
320   PetscErrorCode    ierr;
321   PetscScalar       *barray, *xarray;
322   PetscBool         flg;
323 
324   PetscFunctionBegin;
325   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr);
326   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
327   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr);
328   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
329 
330   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
331 
332   if(mat_mkl_cpardiso->nrhs > 0){
333     ierr = MatDenseGetArray(B,&barray);
334     ierr = MatDenseGetArray(X,&xarray);
335 
336     /* solve phase */
337     /*-------------*/
338     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
339     cluster_sparse_solver (
340       mat_mkl_cpardiso->pt,
341       &mat_mkl_cpardiso->maxfct,
342       &mat_mkl_cpardiso->mnum,
343       &mat_mkl_cpardiso->mtype,
344       &mat_mkl_cpardiso->phase,
345       &mat_mkl_cpardiso->n,
346       mat_mkl_cpardiso->a,
347       mat_mkl_cpardiso->ia,
348       mat_mkl_cpardiso->ja,
349       mat_mkl_cpardiso->perm,
350       &mat_mkl_cpardiso->nrhs,
351       mat_mkl_cpardiso->iparm,
352       &mat_mkl_cpardiso->msglvl,
353       (void*)barray,
354       (void*)xarray,
355       &mat_mkl_cpardiso->comm_mkl_cpardiso,
356       &mat_mkl_cpardiso->err);
357     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
358   }
359   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
360   PetscFunctionReturn(0);
361 
362 }
363 
364 /*
365  * LU Decomposition
366  */
367 #undef __FUNCT__
368 #define __FUNCT__ "MatFactorNumeric_MKL_CPARDISO"
369 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
370 {
371   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->spptr;
372   PetscErrorCode   ierr;
373 
374   /* numerical factorization phase */
375   /*-------------------------------*/
376 
377   PetscFunctionBegin;
378   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
379   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
380 
381   /* numerical factorization phase */
382   /*-------------------------------*/
383   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
384   cluster_sparse_solver (
385     mat_mkl_cpardiso->pt,
386     &mat_mkl_cpardiso->maxfct,
387     &mat_mkl_cpardiso->mnum,
388     &mat_mkl_cpardiso->mtype,
389     &mat_mkl_cpardiso->phase,
390     &mat_mkl_cpardiso->n,
391     mat_mkl_cpardiso->a,
392     mat_mkl_cpardiso->ia,
393     mat_mkl_cpardiso->ja,
394     mat_mkl_cpardiso->perm,
395     &mat_mkl_cpardiso->nrhs,
396     mat_mkl_cpardiso->iparm,
397     &mat_mkl_cpardiso->msglvl,
398     NULL,
399     NULL,
400     &mat_mkl_cpardiso->comm_mkl_cpardiso,
401     &mat_mkl_cpardiso->err);
402   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
403 
404   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
405   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
406   PetscFunctionReturn(0);
407 }
408 
409 /* Sets mkl_cpardiso options from the options database */
410 #undef __FUNCT__
411 #define __FUNCT__ "PetscSetMKL_CPARDISOFromOptions"
412 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
413 {
414   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr;
415   PetscErrorCode      ierr;
416   PetscInt            icntl;
417   PetscBool           flg;
418   int                 pt[IPARM_SIZE], threads;
419 
420   PetscFunctionBegin;
421   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
422   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65",
423     "Number of threads to use",
424     "None",
425     threads,
426     &threads,
427     &flg);CHKERRQ(ierr);
428   if (flg) mkl_set_num_threads(threads);
429 
430   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66",
431     "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time",
432     "None",
433      mat_mkl_cpardiso->maxfct,
434     &icntl,
435     &flg);CHKERRQ(ierr);
436   if (flg) mat_mkl_cpardiso->maxfct = icntl;
437 
438   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67",
439     "Indicates the actual matrix for the solution phase",
440     "None",
441     mat_mkl_cpardiso->mnum,
442     &icntl,
443     &flg);CHKERRQ(ierr);
444   if (flg) mat_mkl_cpardiso->mnum = icntl;
445 
446   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68",
447     "Message level information",
448     "None",
449     mat_mkl_cpardiso->msglvl,
450     &icntl,
451     &flg);CHKERRQ(ierr);
452   if (flg) mat_mkl_cpardiso->msglvl = icntl;
453 
454   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69",
455     "Defines the matrix type",
456     "None",
457     mat_mkl_cpardiso->mtype,
458     &icntl,
459     &flg);CHKERRQ(ierr);
460   if(flg){
461     mat_mkl_cpardiso->mtype = icntl;
462 #if defined(PETSC_USE_REAL_SINGLE)
463     mat_mkl_cpardiso->iparm[27] = 1;
464 #else
465     mat_mkl_cpardiso->iparm[27] = 0;
466 #endif
467     mat_mkl_cpardiso->iparm[34] = 1;
468   }
469   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1",
470     "Use default values",
471     "None",
472     mat_mkl_cpardiso->iparm[0],
473     &icntl,
474     &flg);CHKERRQ(ierr);
475 
476   if(flg && icntl != 0){
477     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2",
478       "Fill-in reducing ordering for the input matrix",
479       "None",
480       mat_mkl_cpardiso->iparm[1],
481       &icntl,
482       &flg);CHKERRQ(ierr);
483     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
484 
485     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4",
486       "Preconditioned CGS/CG",
487       "None",
488       mat_mkl_cpardiso->iparm[3],
489       &icntl,
490       &flg);CHKERRQ(ierr);
491     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
492 
493     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5",
494       "User permutation",
495       "None",
496       mat_mkl_cpardiso->iparm[4],
497       &icntl,
498       &flg);CHKERRQ(ierr);
499     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
500 
501     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6",
502       "Write solution on x",
503       "None",
504       mat_mkl_cpardiso->iparm[5],
505       &icntl,
506       &flg);CHKERRQ(ierr);
507     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
508 
509     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8",
510       "Iterative refinement step",
511       "None",
512       mat_mkl_cpardiso->iparm[7],
513       &icntl,
514       &flg);CHKERRQ(ierr);
515     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
516 
517     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10",
518       "Pivoting perturbation",
519       "None",
520       mat_mkl_cpardiso->iparm[9],
521       &icntl,
522       &flg);CHKERRQ(ierr);
523     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
524 
525     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11",
526       "Scaling vectors",
527       "None",
528       mat_mkl_cpardiso->iparm[10],
529       &icntl,
530       &flg);CHKERRQ(ierr);
531     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
532 
533     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12",
534       "Solve with transposed or conjugate transposed matrix A",
535       "None",
536       mat_mkl_cpardiso->iparm[11],
537       &icntl,
538       &flg);CHKERRQ(ierr);
539     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
540 
541     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13",
542       "Improved accuracy using (non-) symmetric weighted matching",
543       "None",
544       mat_mkl_cpardiso->iparm[12],
545       &icntl,
546       &flg);CHKERRQ(ierr);
547     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
548 
549     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18",
550       "Numbers of non-zero elements",
551       "None",
552       mat_mkl_cpardiso->iparm[17],
553       &icntl,
554       &flg);CHKERRQ(ierr);
555     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
556 
557     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19",
558       "Report number of floating point operations",
559       "None",
560       mat_mkl_cpardiso->iparm[18],
561       &icntl,
562       &flg);CHKERRQ(ierr);
563     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
564 
565     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21",
566       "Pivoting for symmetric indefinite matrices",
567       "None",
568       mat_mkl_cpardiso->iparm[20],
569       &icntl,
570       &flg);CHKERRQ(ierr);
571     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
572 
573     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24",
574       "Parallel factorization control",
575       "None",
576       mat_mkl_cpardiso->iparm[23],
577       &icntl,
578       &flg);CHKERRQ(ierr);
579     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
580 
581     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25",
582       "Parallel forward/backward solve control",
583       "None",
584       mat_mkl_cpardiso->iparm[24],
585       &icntl,
586       &flg);CHKERRQ(ierr);
587     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
588 
589     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27",
590       "Matrix checker",
591       "None",
592       mat_mkl_cpardiso->iparm[26],
593       &icntl,
594       &flg);CHKERRQ(ierr);
595     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
596 
597     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31",
598       "Partial solve and computing selected components of the solution vectors",
599       "None",
600       mat_mkl_cpardiso->iparm[30],
601       &icntl,
602       &flg);CHKERRQ(ierr);
603     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
604 
605     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34",
606       "Optimal number of threads for conditional numerical reproducibility (CNR) mode",
607       "None",
608       mat_mkl_cpardiso->iparm[33],
609       &icntl,
610       &flg);CHKERRQ(ierr);
611     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
612 
613     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60",
614       "Intel MKL_CPARDISO mode",
615       "None",
616       mat_mkl_cpardiso->iparm[59],
617       &icntl,
618       &flg);CHKERRQ(ierr);
619     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
620   }
621 
622   PetscOptionsEnd();
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "PetscInitialize_MKL_CPARDISO"
628 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
629 {
630   PetscErrorCode  ierr;
631   PetscInt        i;
632   PetscMPIInt     size;
633 
634   PetscFunctionBegin;
635 
636   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
637   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
638 
639   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
640   mat_mkl_cpardiso->maxfct = 1;
641   mat_mkl_cpardiso->mnum = 1;
642   mat_mkl_cpardiso->n = A->rmap->N;
643   mat_mkl_cpardiso->msglvl = 0;
644   mat_mkl_cpardiso->nrhs = 1;
645   mat_mkl_cpardiso->err = 0;
646   mat_mkl_cpardiso->phase = -1;
647 #if defined(PETSC_USE_COMPLEX)
648   mat_mkl_cpardiso->mtype = 13;
649 #else
650   mat_mkl_cpardiso->mtype = 11;
651 #endif
652 
653 #if defined(PETSC_USE_REAL_SINGLE)
654   mat_mkl_cpardiso->iparm[27] = 1;
655 #else
656   mat_mkl_cpardiso->iparm[27] = 0;
657 #endif
658 
659   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
660 
661   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
662   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
663   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
664   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
665   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
666   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
667   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
668   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
669   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
670   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
671 
672   mat_mkl_cpardiso->iparm[39] = 0;
673   if (size > 1) {
674     mat_mkl_cpardiso->iparm[39] = 2;
675     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
676     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
677   }
678   mat_mkl_cpardiso->perm = 0;
679   PetscFunctionReturn(0);
680 }
681 
682 /*
683  * Symbolic decomposition. Mkl_Pardiso analysis phase.
684  */
685 #undef __FUNCT__
686 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_CPARDISO"
687 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
688 {
689   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr;
690   PetscErrorCode  ierr;
691 
692   PetscFunctionBegin;
693   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
694 
695   /* Set MKL_CPARDISO options from the options database */
696   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
697 
698   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
699 
700   mat_mkl_cpardiso->n = A->rmap->N;
701 
702   /* analysis phase */
703   /*----------------*/
704   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
705 
706   cluster_sparse_solver (
707     mat_mkl_cpardiso->pt,
708     &mat_mkl_cpardiso->maxfct,
709     &mat_mkl_cpardiso->mnum,
710     &mat_mkl_cpardiso->mtype,
711     &mat_mkl_cpardiso->phase,
712     &mat_mkl_cpardiso->n,
713     mat_mkl_cpardiso->a,
714     mat_mkl_cpardiso->ia,
715     mat_mkl_cpardiso->ja,
716     mat_mkl_cpardiso->perm,
717     &mat_mkl_cpardiso->nrhs,
718     mat_mkl_cpardiso->iparm,
719     &mat_mkl_cpardiso->msglvl,
720     NULL,
721     NULL,
722     &mat_mkl_cpardiso->comm_mkl_cpardiso,
723     &mat_mkl_cpardiso->err);
724 
725   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
726 
727   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
728   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
729   F->ops->solve           = MatSolve_MKL_CPARDISO;
730   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
731   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatView_MKL_CPARDISO"
737 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
738 {
739   PetscErrorCode    ierr;
740   PetscBool         iascii;
741   PetscViewerFormat format;
742   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
743   PetscInt          i;
744 
745   PetscFunctionBegin;
746   /* check if matrix is mkl_cpardiso type */
747   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
748 
749   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
750   if (iascii) {
751     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
752     if (format == PETSC_VIEWER_ASCII_INFO) {
753       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
754       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
755       for(i = 1; i <= 64; i++){
756         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
757       }
758       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
759       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
760       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
761       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
762       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
763       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
764     }
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "MatGetInfo_MKL_CPARDISO"
771 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
772 {
773   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->spptr;
774 
775   PetscFunctionBegin;
776   info->block_size        = 1.0;
777   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
778   info->nz_unneeded       = 0.0;
779   info->assemblies        = 0.0;
780   info->mallocs           = 0.0;
781   info->memory            = 0.0;
782   info->fill_ratio_given  = 0;
783   info->fill_ratio_needed = 0;
784   info->factor_mallocs    = 0;
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatMkl_CPardisoSetCntl_MKL_CPARDISO"
790 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
791 {
792   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->spptr;
793 
794   PetscFunctionBegin;
795   if(icntl <= 64){
796     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
797   } else {
798     if(icntl == 65)
799       mkl_set_num_threads((int)ival);
800     else if(icntl == 66)
801       mat_mkl_cpardiso->maxfct = ival;
802     else if(icntl == 67)
803       mat_mkl_cpardiso->mnum = ival;
804     else if(icntl == 68)
805       mat_mkl_cpardiso->msglvl = ival;
806     else if(icntl == 69){
807       int pt[IPARM_SIZE];
808       mat_mkl_cpardiso->mtype = ival;
809 #if defined(PETSC_USE_REAL_SINGLE)
810       mat_mkl_cpardiso->iparm[27] = 1;
811 #else
812       mat_mkl_cpardiso->iparm[27] = 0;
813 #endif
814       mat_mkl_cpardiso->iparm[34] = 1;
815     }
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatMkl_CPardisoSetCntl"
822 /*@
823   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
824 
825    Logically Collective on Mat
826 
827    Input Parameters:
828 +  F - the factored matrix obtained by calling MatGetFactor()
829 .  icntl - index of Mkl_Pardiso parameter
830 -  ival - value of Mkl_Pardiso parameter
831 
832   Options Database:
833 .   -mat_mkl_cpardiso_<icntl> <ival>
834 
835    Level: beginner
836 
837    References: Mkl_Pardiso Users' Guide
838 
839 .seealso: MatGetFactor()
840 @*/
841 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
842 {
843   PetscErrorCode ierr;
844 
845   PetscFunctionBegin;
846   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
847   PetscFunctionReturn(0);
848 }
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_cpardiso"
852 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type)
853 {
854   PetscFunctionBegin;
855   *type = MATSOLVERMKL_CPARDISO;
856   PetscFunctionReturn(0);
857 }
858 
859 /* MatGetFactor for MPI AIJ matrices */
860 #undef __FUNCT__
861 #define __FUNCT__ "MatGetFactor_mpiaij_mkl_cpardiso"
862 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
863 {
864   Mat              B;
865   PetscErrorCode   ierr;
866   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
867   PetscBool        isSeqAIJ;
868 
869   PetscFunctionBegin;
870   /* Create the factorization matrix */
871 
872   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
873   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
874   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
875   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
876 
877   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
878 
879   if (isSeqAIJ) {
880     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
881   mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
882   } else {
883     mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
884     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
885   }
886 
887   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
888   B->ops->destroy = MatDestroy_MKL_CPARDISO;
889 
890   B->ops->view    = MatView_MKL_CPARDISO;
891   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
892 
893   B->factortype   = ftype;
894   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
895 
896   B->spptr = mat_mkl_cpardiso;
897 
898   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr);
899   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
900   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
901 
902   *F = B;
903   PetscFunctionReturn(0);
904 }
905 
906 #undef __FUNCT__
907 #define __FUNCT__ "MatSolverPackageRegister_MKL_CPardiso"
908 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void)
909 {
910   PetscErrorCode ierr;
911 
912   PetscFunctionBegin;
913   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
914   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917