xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision 7cf71cfdedeb9639d8ef2da3c41fb734efb627ee)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #include <mkl.h>
12 #include <mkl_cluster_sparse_solver.h>
13 
14 /*
15  *  Possible mkl_cpardiso phases that controls the execution of the solver.
16  *  For more information check mkl_cpardiso manual.
17  */
18 #define JOB_ANALYSIS 11
19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
21 #define JOB_NUMERICAL_FACTORIZATION 22
22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
27 #define JOB_RELEASE_OF_LU_MEMORY 0
28 #define JOB_RELEASE_OF_ALL_MEMORY -1
29 
30 #define IPARM_SIZE 64
31 #define INT_TYPE MKL_INT
32 
33 static const char *Err_MSG_CPardiso(int errNo){
34   switch (errNo) {
35     case -1:
36       return "input inconsistent"; break;
37     case -2:
38       return "not enough memory"; break;
39     case -3:
40       return "reordering problem"; break;
41     case -4:
42       return "zero pivot, numerical factorization or iterative refinement problem"; break;
43     case -5:
44       return "unclassified (internal) error"; break;
45     case -6:
46       return "preordering failed (matrix types 11, 13 only)"; break;
47     case -7:
48       return "diagonal matrix problem"; break;
49     case -8:
50       return "32-bit integer overflow problem"; break;
51     case -9:
52       return "not enough memory for OOC"; break;
53     case -10:
54       return "problems with opening OOC temporary files"; break;
55     case -11:
56       return "read/write problems with the OOC data file"; break;
57     default :
58       return "unknown error";
59   }
60 }
61 
62 /*
63  *  Internal data structure.
64  *  For more information check mkl_cpardiso manual.
65  */
66 
67 typedef struct {
68 
69   /* Configuration vector */
70   INT_TYPE     iparm[IPARM_SIZE];
71 
72   /*
73    * Internal mkl_cpardiso memory location.
74    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
75    */
76   void         *pt[IPARM_SIZE];
77 
78   MPI_Comm     comm_mkl_cpardiso;
79 
80   /* Basic mkl_cpardiso info*/
81   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
82 
83   /* Matrix structure */
84   PetscScalar  *a;
85 
86   INT_TYPE     *ia, *ja;
87 
88   /* Number of non-zero elements */
89   INT_TYPE     nz;
90 
91   /* Row permutaton vector*/
92   INT_TYPE     *perm;
93 
94   /* Define is matrix preserve sparce structure. */
95   MatStructure matstruc;
96 
97   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
98 
99   /* True if mkl_cpardiso function have been used. */
100   PetscBool CleanUp;
101 } Mat_MKL_CPARDISO;
102 
103 /*
104  * Copy the elements of matrix A.
105  * Input:
106  *   - Mat A: MATSEQAIJ matrix
107  *   - int shift: matrix index.
108  *     - 0 for c representation
109  *     - 1 for fortran representation
110  *   - MatReuse reuse:
111  *     - MAT_INITIAL_MATRIX: Create a new aij representation
112  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
113  * Output:
114  *   - int *nnz: Number of nonzero-elements.
115  *   - int **r pointer to i index
116  *   - int **c pointer to j elements
117  *   - MATRIXTYPE **v: Non-zero elements
118  */
119 #undef __FUNCT__
120 #define __FUNCT__ "MatCopy_seqaij_seqaij_MKL_CPARDISO"
121 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
122 {
123   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
124 
125   PetscFunctionBegin;
126   *v=aa->a;
127   if (reuse == MAT_INITIAL_MATRIX) {
128     *r   = (INT_TYPE*)aa->i;
129     *c   = (INT_TYPE*)aa->j;
130     *nnz = aa->nz;
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO"
137 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
138 {
139   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
140   PetscErrorCode    ierr;
141   PetscInt          rstart,nz,i,j,jj,irow,countA,countB;
142   PetscInt          *row,*col;
143   const PetscScalar *av, *bv,*v1,*v2;
144   PetscScalar       *val;
145   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
146   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
147   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
148   PetscInt          nn, colA_start,jB,jcol;
149 
150   PetscFunctionBegin;
151   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
152   av=aa->a; bv=bb->a;
153 
154   garray = mat->garray;
155 
156   if (reuse == MAT_INITIAL_MATRIX) {
157     nz   = aa->nz + bb->nz;
158     *nnz = nz;
159     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
160     col  = row + m + 1;
161     val  = (PetscScalar*)(col + nz);
162     *r = row; *c = col; *v = val;
163     row[0] = 0;
164   } else {
165     row = *r; col = *c; val = *v;
166   }
167 
168   nz = 0;
169   for (i=0; i<m; i++) {
170     row[i] = nz;
171     countA     = ai[i+1] - ai[i];
172     countB     = bi[i+1] - bi[i];
173     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
174     bjj        = bj + bi[i];
175 
176     /* B part, smaller col index */
177     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
178     jB         = 0;
179     for (j=0; j<countB; j++) {
180       jcol = garray[bjj[j]];
181       if (jcol > colA_start) {
182         jB = j;
183         break;
184       }
185       col[nz]   = jcol;
186       val[nz++] = *bv++;
187       if (j==countB-1) jB = countB;
188     }
189 
190     /* A part */
191     for (j=0; j<countA; j++) {
192       col[nz]   = rstart + ajj[j];
193       val[nz++] = *av++;
194     }
195 
196     /* B part, larger col index */
197     for (j=jB; j<countB; j++) {
198       col[nz]   = garray[bjj[j]];
199       val[nz++] = *bv++;
200     }
201   }
202   row[m] = nz;
203 
204   PetscFunctionReturn(0);
205 }
206 
207 /*
208  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
209  */
210 #undef __FUNCT__
211 #define __FUNCT__ "MatDestroy_MKL_CPARDISO"
212 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
213 {
214   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
215   PetscErrorCode   ierr;
216    PetscBool        flg;
217 
218   PetscFunctionBegin;
219   /* Terminate instance, deallocate memories */
220   if (mat_mkl_cpardiso->CleanUp) {
221     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
222 
223     cluster_sparse_solver (
224       mat_mkl_cpardiso->pt,
225       &mat_mkl_cpardiso->maxfct,
226       &mat_mkl_cpardiso->mnum,
227       &mat_mkl_cpardiso->mtype,
228       &mat_mkl_cpardiso->phase,
229       &mat_mkl_cpardiso->n,
230       NULL,
231       NULL,
232       NULL,
233       mat_mkl_cpardiso->perm,
234       &mat_mkl_cpardiso->nrhs,
235       mat_mkl_cpardiso->iparm,
236       &mat_mkl_cpardiso->msglvl,
237       NULL,
238       NULL,
239       &mat_mkl_cpardiso->comm_mkl_cpardiso,
240       &mat_mkl_cpardiso->err);
241   }
242 
243   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&flg);CHKERRQ(ierr);
244   if (flg) {
245     ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
246   } else {
247     ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr);
248     ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
249   }
250   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
251   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
252 
253   /* clear composed functions */
254   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
255   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 /*
260  * Computes Ax = b
261  */
262 #undef __FUNCT__
263 #define __FUNCT__ "MatSolve_MKL_CPARDISO"
264 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
265 {
266   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr;
267   PetscErrorCode    ierr;
268   PetscScalar       *xarray;
269   const PetscScalar *barray;
270 
271   PetscFunctionBegin;
272   mat_mkl_cpardiso->nrhs = 1;
273   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
274   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
275 
276   /* solve phase */
277   /*-------------*/
278   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
279   cluster_sparse_solver (
280     mat_mkl_cpardiso->pt,
281     &mat_mkl_cpardiso->maxfct,
282     &mat_mkl_cpardiso->mnum,
283     &mat_mkl_cpardiso->mtype,
284     &mat_mkl_cpardiso->phase,
285     &mat_mkl_cpardiso->n,
286     mat_mkl_cpardiso->a,
287     mat_mkl_cpardiso->ia,
288     mat_mkl_cpardiso->ja,
289     mat_mkl_cpardiso->perm,
290     &mat_mkl_cpardiso->nrhs,
291     mat_mkl_cpardiso->iparm,
292     &mat_mkl_cpardiso->msglvl,
293     (void*)barray,
294     (void*)xarray,
295     &mat_mkl_cpardiso->comm_mkl_cpardiso,
296     &mat_mkl_cpardiso->err);
297 
298   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
299 
300   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
301   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
302   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSolveTranspose_MKL_CPARDISO"
308 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
309 {
310   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
311   PetscErrorCode   ierr;
312 
313   PetscFunctionBegin;
314 #if defined(PETSC_USE_COMPLEX)
315   mat_mkl_cpardiso->iparm[12 - 1] = 1;
316 #else
317   mat_mkl_cpardiso->iparm[12 - 1] = 2;
318 #endif
319   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
320   mat_mkl_cpardiso->iparm[12 - 1] = 0;
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatMatSolve_MKL_CPARDISO"
326 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
327 {
328   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr;
329   PetscErrorCode    ierr;
330   PetscScalar       *barray, *xarray;
331   PetscBool         flg;
332 
333   PetscFunctionBegin;
334   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr);
335   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
336   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr);
337   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
338 
339   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
340 
341   if(mat_mkl_cpardiso->nrhs > 0){
342     ierr = MatDenseGetArray(B,&barray);
343     ierr = MatDenseGetArray(X,&xarray);
344 
345     /* solve phase */
346     /*-------------*/
347     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
348     cluster_sparse_solver (
349       mat_mkl_cpardiso->pt,
350       &mat_mkl_cpardiso->maxfct,
351       &mat_mkl_cpardiso->mnum,
352       &mat_mkl_cpardiso->mtype,
353       &mat_mkl_cpardiso->phase,
354       &mat_mkl_cpardiso->n,
355       mat_mkl_cpardiso->a,
356       mat_mkl_cpardiso->ia,
357       mat_mkl_cpardiso->ja,
358       mat_mkl_cpardiso->perm,
359       &mat_mkl_cpardiso->nrhs,
360       mat_mkl_cpardiso->iparm,
361       &mat_mkl_cpardiso->msglvl,
362       (void*)barray,
363       (void*)xarray,
364       &mat_mkl_cpardiso->comm_mkl_cpardiso,
365       &mat_mkl_cpardiso->err);
366     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
367   }
368   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
369   PetscFunctionReturn(0);
370 
371 }
372 
373 /*
374  * LU Decomposition
375  */
376 #undef __FUNCT__
377 #define __FUNCT__ "MatFactorNumeric_MKL_CPARDISO"
378 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
379 {
380   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->spptr;
381   PetscErrorCode   ierr;
382 
383   /* numerical factorization phase */
384   /*-------------------------------*/
385 
386   PetscFunctionBegin;
387   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
388   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
389 
390   /* numerical factorization phase */
391   /*-------------------------------*/
392   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
393   cluster_sparse_solver (
394     mat_mkl_cpardiso->pt,
395     &mat_mkl_cpardiso->maxfct,
396     &mat_mkl_cpardiso->mnum,
397     &mat_mkl_cpardiso->mtype,
398     &mat_mkl_cpardiso->phase,
399     &mat_mkl_cpardiso->n,
400     mat_mkl_cpardiso->a,
401     mat_mkl_cpardiso->ia,
402     mat_mkl_cpardiso->ja,
403     mat_mkl_cpardiso->perm,
404     &mat_mkl_cpardiso->nrhs,
405     mat_mkl_cpardiso->iparm,
406     &mat_mkl_cpardiso->msglvl,
407     NULL,
408     NULL,
409     &mat_mkl_cpardiso->comm_mkl_cpardiso,
410     &mat_mkl_cpardiso->err);
411   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
412 
413   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
414   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
415   PetscFunctionReturn(0);
416 }
417 
418 /* Sets mkl_cpardiso options from the options database */
419 #undef __FUNCT__
420 #define __FUNCT__ "PetscSetMKL_CPARDISOFromOptions"
421 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
422 {
423   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr;
424   PetscErrorCode      ierr;
425   PetscInt            icntl;
426   PetscBool           flg;
427   int                 pt[IPARM_SIZE], threads;
428 
429   PetscFunctionBegin;
430   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
431   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65",
432     "Number of threads to use",
433     "None",
434     threads,
435     &threads,
436     &flg);CHKERRQ(ierr);
437   if (flg) mkl_set_num_threads(threads);
438 
439   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66",
440     "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time",
441     "None",
442      mat_mkl_cpardiso->maxfct,
443     &icntl,
444     &flg);CHKERRQ(ierr);
445   if (flg) mat_mkl_cpardiso->maxfct = icntl;
446 
447   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67",
448     "Indicates the actual matrix for the solution phase",
449     "None",
450     mat_mkl_cpardiso->mnum,
451     &icntl,
452     &flg);CHKERRQ(ierr);
453   if (flg) mat_mkl_cpardiso->mnum = icntl;
454 
455   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68",
456     "Message level information",
457     "None",
458     mat_mkl_cpardiso->msglvl,
459     &icntl,
460     &flg);CHKERRQ(ierr);
461   if (flg) mat_mkl_cpardiso->msglvl = icntl;
462 
463   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69",
464     "Defines the matrix type",
465     "None",
466     mat_mkl_cpardiso->mtype,
467     &icntl,
468     &flg);CHKERRQ(ierr);
469   if(flg){
470     mat_mkl_cpardiso->mtype = icntl;
471 #if defined(PETSC_USE_REAL_SINGLE)
472     mat_mkl_cpardiso->iparm[27] = 1;
473 #else
474     mat_mkl_cpardiso->iparm[27] = 0;
475 #endif
476     mat_mkl_cpardiso->iparm[34] = 1;
477   }
478   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1",
479     "Use default values",
480     "None",
481     mat_mkl_cpardiso->iparm[0],
482     &icntl,
483     &flg);CHKERRQ(ierr);
484 
485   if(flg && icntl != 0){
486     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2",
487       "Fill-in reducing ordering for the input matrix",
488       "None",
489       mat_mkl_cpardiso->iparm[1],
490       &icntl,
491       &flg);CHKERRQ(ierr);
492     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
493 
494     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4",
495       "Preconditioned CGS/CG",
496       "None",
497       mat_mkl_cpardiso->iparm[3],
498       &icntl,
499       &flg);CHKERRQ(ierr);
500     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
501 
502     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5",
503       "User permutation",
504       "None",
505       mat_mkl_cpardiso->iparm[4],
506       &icntl,
507       &flg);CHKERRQ(ierr);
508     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
509 
510     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6",
511       "Write solution on x",
512       "None",
513       mat_mkl_cpardiso->iparm[5],
514       &icntl,
515       &flg);CHKERRQ(ierr);
516     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
517 
518     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8",
519       "Iterative refinement step",
520       "None",
521       mat_mkl_cpardiso->iparm[7],
522       &icntl,
523       &flg);CHKERRQ(ierr);
524     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
525 
526     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10",
527       "Pivoting perturbation",
528       "None",
529       mat_mkl_cpardiso->iparm[9],
530       &icntl,
531       &flg);CHKERRQ(ierr);
532     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
533 
534     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11",
535       "Scaling vectors",
536       "None",
537       mat_mkl_cpardiso->iparm[10],
538       &icntl,
539       &flg);CHKERRQ(ierr);
540     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
541 
542     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12",
543       "Solve with transposed or conjugate transposed matrix A",
544       "None",
545       mat_mkl_cpardiso->iparm[11],
546       &icntl,
547       &flg);CHKERRQ(ierr);
548     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
549 
550     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13",
551       "Improved accuracy using (non-) symmetric weighted matching",
552       "None",
553       mat_mkl_cpardiso->iparm[12],
554       &icntl,
555       &flg);CHKERRQ(ierr);
556     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
557 
558     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18",
559       "Numbers of non-zero elements",
560       "None",
561       mat_mkl_cpardiso->iparm[17],
562       &icntl,
563       &flg);CHKERRQ(ierr);
564     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
565 
566     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19",
567       "Report number of floating point operations",
568       "None",
569       mat_mkl_cpardiso->iparm[18],
570       &icntl,
571       &flg);CHKERRQ(ierr);
572     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
573 
574     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21",
575       "Pivoting for symmetric indefinite matrices",
576       "None",
577       mat_mkl_cpardiso->iparm[20],
578       &icntl,
579       &flg);CHKERRQ(ierr);
580     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
581 
582     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24",
583       "Parallel factorization control",
584       "None",
585       mat_mkl_cpardiso->iparm[23],
586       &icntl,
587       &flg);CHKERRQ(ierr);
588     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
589 
590     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25",
591       "Parallel forward/backward solve control",
592       "None",
593       mat_mkl_cpardiso->iparm[24],
594       &icntl,
595       &flg);CHKERRQ(ierr);
596     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
597 
598     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27",
599       "Matrix checker",
600       "None",
601       mat_mkl_cpardiso->iparm[26],
602       &icntl,
603       &flg);CHKERRQ(ierr);
604     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
605 
606     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31",
607       "Partial solve and computing selected components of the solution vectors",
608       "None",
609       mat_mkl_cpardiso->iparm[30],
610       &icntl,
611       &flg);CHKERRQ(ierr);
612     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
613 
614     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34",
615       "Optimal number of threads for conditional numerical reproducibility (CNR) mode",
616       "None",
617       mat_mkl_cpardiso->iparm[33],
618       &icntl,
619       &flg);CHKERRQ(ierr);
620     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
621 
622     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60",
623       "Intel MKL_CPARDISO mode",
624       "None",
625       mat_mkl_cpardiso->iparm[59],
626       &icntl,
627       &flg);CHKERRQ(ierr);
628     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
629   }
630 
631   PetscOptionsEnd();
632   PetscFunctionReturn(0);
633 }
634 
635 #undef __FUNCT__
636 #define __FUNCT__ "PetscInitialize_MKL_CPARDISO"
637 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
638 {
639   PetscErrorCode  ierr;
640   PetscInt        i;
641   PetscMPIInt     size;
642 
643   PetscFunctionBegin;
644 
645   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
646   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
647 
648   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
649   mat_mkl_cpardiso->maxfct = 1;
650   mat_mkl_cpardiso->mnum = 1;
651   mat_mkl_cpardiso->n = A->rmap->N;
652   mat_mkl_cpardiso->msglvl = 0;
653   mat_mkl_cpardiso->nrhs = 1;
654   mat_mkl_cpardiso->err = 0;
655   mat_mkl_cpardiso->phase = -1;
656 #if defined(PETSC_USE_COMPLEX)
657   mat_mkl_cpardiso->mtype = 13;
658 #else
659   mat_mkl_cpardiso->mtype = 11;
660 #endif
661 
662 #if defined(PETSC_USE_REAL_SINGLE)
663   mat_mkl_cpardiso->iparm[27] = 1;
664 #else
665   mat_mkl_cpardiso->iparm[27] = 0;
666 #endif
667 
668   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
669 
670   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
671   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
672   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
673   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
674   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
675   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
676   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
677   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
678   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
679   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
680 
681   mat_mkl_cpardiso->iparm[39] = 0;
682   if (size > 1) {
683     mat_mkl_cpardiso->iparm[39] = 2;
684     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
685     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
686   }
687   mat_mkl_cpardiso->perm = 0;
688   PetscFunctionReturn(0);
689 }
690 
691 /*
692  * Symbolic decomposition. Mkl_Pardiso analysis phase.
693  */
694 #undef __FUNCT__
695 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_CPARDISO"
696 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
697 {
698   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr;
699   PetscErrorCode  ierr;
700 
701   PetscFunctionBegin;
702   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
703 
704   /* Set MKL_CPARDISO options from the options database */
705   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
706 
707   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
708 
709   mat_mkl_cpardiso->n = A->rmap->N;
710 
711   /* analysis phase */
712   /*----------------*/
713   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
714 
715   cluster_sparse_solver (
716     mat_mkl_cpardiso->pt,
717     &mat_mkl_cpardiso->maxfct,
718     &mat_mkl_cpardiso->mnum,
719     &mat_mkl_cpardiso->mtype,
720     &mat_mkl_cpardiso->phase,
721     &mat_mkl_cpardiso->n,
722     mat_mkl_cpardiso->a,
723     mat_mkl_cpardiso->ia,
724     mat_mkl_cpardiso->ja,
725     mat_mkl_cpardiso->perm,
726     &mat_mkl_cpardiso->nrhs,
727     mat_mkl_cpardiso->iparm,
728     &mat_mkl_cpardiso->msglvl,
729     NULL,
730     NULL,
731     &mat_mkl_cpardiso->comm_mkl_cpardiso,
732     &mat_mkl_cpardiso->err);
733 
734   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
735 
736   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
737   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
738   F->ops->solve           = MatSolve_MKL_CPARDISO;
739   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
740   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatView_MKL_CPARDISO"
746 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
747 {
748   PetscErrorCode    ierr;
749   PetscBool         iascii;
750   PetscViewerFormat format;
751   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
752   PetscInt          i;
753 
754   PetscFunctionBegin;
755   /* check if matrix is mkl_cpardiso type */
756   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
757 
758   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
759   if (iascii) {
760     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
761     if (format == PETSC_VIEWER_ASCII_INFO) {
762       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
763       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
764       for(i = 1; i <= 64; i++){
765         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
766       }
767       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
768       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
769       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
770       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
771       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
772       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
773     }
774   }
775   PetscFunctionReturn(0);
776 }
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatGetInfo_MKL_CPARDISO"
780 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
781 {
782   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->spptr;
783 
784   PetscFunctionBegin;
785   info->block_size        = 1.0;
786   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
787   info->nz_unneeded       = 0.0;
788   info->assemblies        = 0.0;
789   info->mallocs           = 0.0;
790   info->memory            = 0.0;
791   info->fill_ratio_given  = 0;
792   info->fill_ratio_needed = 0;
793   info->factor_mallocs    = 0;
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMkl_CPardisoSetCntl_MKL_CPARDISO"
799 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
800 {
801   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->spptr;
802 
803   PetscFunctionBegin;
804   if(icntl <= 64){
805     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
806   } else {
807     if(icntl == 65)
808       mkl_set_num_threads((int)ival);
809     else if(icntl == 66)
810       mat_mkl_cpardiso->maxfct = ival;
811     else if(icntl == 67)
812       mat_mkl_cpardiso->mnum = ival;
813     else if(icntl == 68)
814       mat_mkl_cpardiso->msglvl = ival;
815     else if(icntl == 69){
816       int pt[IPARM_SIZE];
817       mat_mkl_cpardiso->mtype = ival;
818 #if defined(PETSC_USE_REAL_SINGLE)
819       mat_mkl_cpardiso->iparm[27] = 1;
820 #else
821       mat_mkl_cpardiso->iparm[27] = 0;
822 #endif
823       mat_mkl_cpardiso->iparm[34] = 1;
824     }
825   }
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMkl_CPardisoSetCntl"
831 /*@
832   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
833 
834    Logically Collective on Mat
835 
836    Input Parameters:
837 +  F - the factored matrix obtained by calling MatGetFactor()
838 .  icntl - index of Mkl_Pardiso parameter
839 -  ival - value of Mkl_Pardiso parameter
840 
841   Options Database:
842 .   -mat_mkl_cpardiso_<icntl> <ival>
843 
844    Level: beginner
845 
846    References:
847 .      Mkl_Pardiso Users' Guide
848 
849 .seealso: MatGetFactor()
850 @*/
851 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
852 {
853   PetscErrorCode ierr;
854 
855   PetscFunctionBegin;
856   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_cpardiso"
862 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type)
863 {
864   PetscFunctionBegin;
865   *type = MATSOLVERMKL_CPARDISO;
866   PetscFunctionReturn(0);
867 }
868 
869 /* MatGetFactor for MPI AIJ matrices */
870 #undef __FUNCT__
871 #define __FUNCT__ "MatGetFactor_mpiaij_mkl_cpardiso"
872 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
873 {
874   Mat              B;
875   PetscErrorCode   ierr;
876   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
877   PetscBool        isSeqAIJ;
878 
879   PetscFunctionBegin;
880   /* Create the factorization matrix */
881 
882   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
883   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
884   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
885   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
886 
887   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
888 
889   if (isSeqAIJ) {
890     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
891   mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
892   } else {
893     mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
894     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
895   }
896 
897   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
898   B->ops->destroy = MatDestroy_MKL_CPARDISO;
899 
900   B->ops->view    = MatView_MKL_CPARDISO;
901   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
902 
903   B->factortype   = ftype;
904   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
905 
906   B->spptr = mat_mkl_cpardiso;
907 
908   /* set solvertype */
909   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
910   ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr);
911 
912   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
914   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
915 
916   *F = B;
917   PetscFunctionReturn(0);
918 }
919 
920 #undef __FUNCT__
921 #define __FUNCT__ "MatSolverPackageRegister_MKL_CPardiso"
922 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void)
923 {
924   PetscErrorCode ierr;
925 
926   PetscFunctionBegin;
927   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
928   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
929   PetscFunctionReturn(0);
930 }
931