xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision 37eeb8152ec6a2cf24186d3591c2c5de5dfd8fa5)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
12 #define MKL_ILP64
13 #endif
14 #include <mkl.h>
15 #include <mkl_cluster_sparse_solver.h>
16 
17 /*
18  *  Possible mkl_cpardiso phases that controls the execution of the solver.
19  *  For more information check mkl_cpardiso manual.
20  */
21 #define JOB_ANALYSIS 11
22 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
23 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
24 #define JOB_NUMERICAL_FACTORIZATION 22
25 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
26 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
27 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
28 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
29 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
30 #define JOB_RELEASE_OF_LU_MEMORY 0
31 #define JOB_RELEASE_OF_ALL_MEMORY -1
32 
33 #define IPARM_SIZE 64
34 #define INT_TYPE MKL_INT
35 
36 static const char *Err_MSG_CPardiso(int errNo){
37   switch (errNo) {
38     case -1:
39       return "input inconsistent"; break;
40     case -2:
41       return "not enough memory"; break;
42     case -3:
43       return "reordering problem"; break;
44     case -4:
45       return "zero pivot, numerical factorization or iterative refinement problem"; break;
46     case -5:
47       return "unclassified (internal) error"; break;
48     case -6:
49       return "preordering failed (matrix types 11, 13 only)"; break;
50     case -7:
51       return "diagonal matrix problem"; break;
52     case -8:
53       return "32-bit integer overflow problem"; break;
54     case -9:
55       return "not enough memory for OOC"; break;
56     case -10:
57       return "problems with opening OOC temporary files"; break;
58     case -11:
59       return "read/write problems with the OOC data file"; break;
60     default :
61       return "unknown error";
62   }
63 }
64 
65 /*
66  *  Internal data structure.
67  *  For more information check mkl_cpardiso manual.
68  */
69 
70 typedef struct {
71 
72   /* Configuration vector */
73   INT_TYPE     iparm[IPARM_SIZE];
74 
75   /*
76    * Internal mkl_cpardiso memory location.
77    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
78    */
79   void         *pt[IPARM_SIZE];
80 
81   MPI_Comm     comm_mkl_cpardiso;
82 
83   /* Basic mkl_cpardiso info*/
84   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
85 
86   /* Matrix structure */
87   PetscScalar  *a;
88 
89   INT_TYPE     *ia, *ja;
90 
91   /* Number of non-zero elements */
92   INT_TYPE     nz;
93 
94   /* Row permutaton vector*/
95   INT_TYPE     *perm;
96 
97   /* Define is matrix preserve sparce structure. */
98   MatStructure matstruc;
99 
100   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
101 
102   /* True if mkl_cpardiso function have been used. */
103   PetscBool CleanUp;
104 } Mat_MKL_CPARDISO;
105 
106 /*
107  * Copy the elements of matrix A.
108  * Input:
109  *   - Mat A: MATSEQAIJ matrix
110  *   - int shift: matrix index.
111  *     - 0 for c representation
112  *     - 1 for fortran representation
113  *   - MatReuse reuse:
114  *     - MAT_INITIAL_MATRIX: Create a new aij representation
115  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
116  * Output:
117  *   - int *nnz: Number of nonzero-elements.
118  *   - int **r pointer to i index
119  *   - int **c pointer to j elements
120  *   - MATRIXTYPE **v: Non-zero elements
121  */
122 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
123 {
124   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
125 
126   PetscFunctionBegin;
127   *v=aa->a;
128   if (reuse == MAT_INITIAL_MATRIX) {
129     *r   = (INT_TYPE*)aa->i;
130     *c   = (INT_TYPE*)aa->j;
131     *nnz = aa->nz;
132   }
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
137 {
138   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
139   PetscErrorCode    ierr;
140   PetscInt          rstart,nz,i,j,countA,countB;
141   PetscInt          *row,*col;
142   const PetscScalar *av, *bv;
143   PetscScalar       *val;
144   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
145   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
146   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
147   PetscInt          colA_start,jB,jcol;
148 
149   PetscFunctionBegin;
150   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
151   av=aa->a; bv=bb->a;
152 
153   garray = mat->garray;
154 
155   if (reuse == MAT_INITIAL_MATRIX) {
156     nz   = aa->nz + bb->nz;
157     *nnz = nz;
158     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
159     col  = row + m + 1;
160     val  = (PetscScalar*)(col + nz);
161     *r = row; *c = col; *v = val;
162     row[0] = 0;
163   } else {
164     row = *r; col = *c; val = *v;
165   }
166 
167   nz = 0;
168   for (i=0; i<m; i++) {
169     row[i] = nz;
170     countA     = ai[i+1] - ai[i];
171     countB     = bi[i+1] - bi[i];
172     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
173     bjj        = bj + bi[i];
174 
175     /* B part, smaller col index */
176     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
177     jB         = 0;
178     for (j=0; j<countB; j++) {
179       jcol = garray[bjj[j]];
180       if (jcol > colA_start) {
181         jB = j;
182         break;
183       }
184       col[nz]   = jcol;
185       val[nz++] = *bv++;
186       if (j==countB-1) jB = countB;
187     }
188 
189     /* A part */
190     for (j=0; j<countA; j++) {
191       col[nz]   = rstart + ajj[j];
192       val[nz++] = *av++;
193     }
194 
195     /* B part, larger col index */
196     for (j=jB; j<countB; j++) {
197       col[nz]   = garray[bjj[j]];
198       val[nz++] = *bv++;
199     }
200   }
201   row[m] = nz;
202 
203   PetscFunctionReturn(0);
204 }
205 
206 /*
207  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
208  */
209 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
210 {
211   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
212   PetscErrorCode   ierr;
213 
214   PetscFunctionBegin;
215   /* Terminate instance, deallocate memories */
216   if (mat_mkl_cpardiso->CleanUp) {
217     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
218 
219     cluster_sparse_solver (
220       mat_mkl_cpardiso->pt,
221       &mat_mkl_cpardiso->maxfct,
222       &mat_mkl_cpardiso->mnum,
223       &mat_mkl_cpardiso->mtype,
224       &mat_mkl_cpardiso->phase,
225       &mat_mkl_cpardiso->n,
226       NULL,
227       NULL,
228       NULL,
229       mat_mkl_cpardiso->perm,
230       &mat_mkl_cpardiso->nrhs,
231       mat_mkl_cpardiso->iparm,
232       &mat_mkl_cpardiso->msglvl,
233       NULL,
234       NULL,
235       &mat_mkl_cpardiso->comm_mkl_cpardiso,
236       (PetscInt*)&mat_mkl_cpardiso->err);
237   }
238 
239   if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) {
240     ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr);
241   }
242   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
243   ierr = PetscFree(A->data);CHKERRQ(ierr);
244 
245   /* clear composed functions */
246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 /*
252  * Computes Ax = b
253  */
254 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
255 {
256   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
257   PetscErrorCode    ierr;
258   PetscScalar       *xarray;
259   const PetscScalar *barray;
260 
261   PetscFunctionBegin;
262   mat_mkl_cpardiso->nrhs = 1;
263   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
264   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
265 
266   /* solve phase */
267   /*-------------*/
268   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
269   cluster_sparse_solver (
270     mat_mkl_cpardiso->pt,
271     &mat_mkl_cpardiso->maxfct,
272     &mat_mkl_cpardiso->mnum,
273     &mat_mkl_cpardiso->mtype,
274     &mat_mkl_cpardiso->phase,
275     &mat_mkl_cpardiso->n,
276     mat_mkl_cpardiso->a,
277     mat_mkl_cpardiso->ia,
278     mat_mkl_cpardiso->ja,
279     mat_mkl_cpardiso->perm,
280     &mat_mkl_cpardiso->nrhs,
281     mat_mkl_cpardiso->iparm,
282     &mat_mkl_cpardiso->msglvl,
283     (void*)barray,
284     (void*)xarray,
285     &mat_mkl_cpardiso->comm_mkl_cpardiso,
286     (PetscInt*)&mat_mkl_cpardiso->err);
287 
288   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
289 
290   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
291   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
292   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
293   PetscFunctionReturn(0);
294 }
295 
296 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
297 {
298   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
299   PetscErrorCode   ierr;
300 
301   PetscFunctionBegin;
302 #if defined(PETSC_USE_COMPLEX)
303   mat_mkl_cpardiso->iparm[12 - 1] = 1;
304 #else
305   mat_mkl_cpardiso->iparm[12 - 1] = 2;
306 #endif
307   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
308   mat_mkl_cpardiso->iparm[12 - 1] = 0;
309   PetscFunctionReturn(0);
310 }
311 
312 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
313 {
314   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
315   PetscErrorCode    ierr;
316   PetscScalar       *xarray;
317   const PetscScalar *barray;
318 
319   PetscFunctionBegin;
320   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
321 
322   if(mat_mkl_cpardiso->nrhs > 0){
323     ierr = MatDenseGetArrayRead(B,&barray);
324     ierr = MatDenseGetArray(X,&xarray);
325 
326     /* solve phase */
327     /*-------------*/
328     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
329     cluster_sparse_solver (
330       mat_mkl_cpardiso->pt,
331       &mat_mkl_cpardiso->maxfct,
332       &mat_mkl_cpardiso->mnum,
333       &mat_mkl_cpardiso->mtype,
334       &mat_mkl_cpardiso->phase,
335       &mat_mkl_cpardiso->n,
336       mat_mkl_cpardiso->a,
337       mat_mkl_cpardiso->ia,
338       mat_mkl_cpardiso->ja,
339       mat_mkl_cpardiso->perm,
340       &mat_mkl_cpardiso->nrhs,
341       mat_mkl_cpardiso->iparm,
342       &mat_mkl_cpardiso->msglvl,
343       (void*)barray,
344       (void*)xarray,
345       &mat_mkl_cpardiso->comm_mkl_cpardiso,
346       (PetscInt*)&mat_mkl_cpardiso->err);
347     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
348     ierr = MatDenseRestoreArrayRead(B,&barray);
349     ierr = MatDenseRestoreArray(X,&xarray);
350 
351   }
352   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
353   PetscFunctionReturn(0);
354 
355 }
356 
357 /*
358  * LU Decomposition
359  */
360 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
361 {
362   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
363   PetscErrorCode   ierr;
364 
365   PetscFunctionBegin;
366   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
367   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
368 
369   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
370   cluster_sparse_solver (
371     mat_mkl_cpardiso->pt,
372     &mat_mkl_cpardiso->maxfct,
373     &mat_mkl_cpardiso->mnum,
374     &mat_mkl_cpardiso->mtype,
375     &mat_mkl_cpardiso->phase,
376     &mat_mkl_cpardiso->n,
377     mat_mkl_cpardiso->a,
378     mat_mkl_cpardiso->ia,
379     mat_mkl_cpardiso->ja,
380     mat_mkl_cpardiso->perm,
381     &mat_mkl_cpardiso->nrhs,
382     mat_mkl_cpardiso->iparm,
383     &mat_mkl_cpardiso->msglvl,
384     NULL,
385     NULL,
386     &mat_mkl_cpardiso->comm_mkl_cpardiso,
387     &mat_mkl_cpardiso->err);
388   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
389 
390   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
391   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
392   PetscFunctionReturn(0);
393 }
394 
395 /* Sets mkl_cpardiso options from the options database */
396 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
397 {
398   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
399   PetscErrorCode      ierr;
400   PetscInt            icntl,threads;
401   PetscBool           flg;
402 
403   PetscFunctionBegin;
404   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
405   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr);
406   if (flg) mkl_set_num_threads((int)threads);
407 
408   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr);
409   if (flg) mat_mkl_cpardiso->maxfct = icntl;
410 
411   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr);
412   if (flg) mat_mkl_cpardiso->mnum = icntl;
413 
414   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr);
415   if (flg) mat_mkl_cpardiso->msglvl = icntl;
416 
417   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr);
418   if(flg){
419     mat_mkl_cpardiso->mtype = icntl;
420 #if defined(PETSC_USE_REAL_SINGLE)
421     mat_mkl_cpardiso->iparm[27] = 1;
422 #else
423     mat_mkl_cpardiso->iparm[27] = 0;
424 #endif
425     mat_mkl_cpardiso->iparm[34] = 1;
426   }
427   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr);
428 
429   if(flg && icntl != 0){
430     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr);
431     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
432 
433     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr);
434     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
435 
436     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr);
437     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
438 
439     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr);
440     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
441 
442     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr);
443     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
444 
445     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr);
446     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
447 
448     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr);
449     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
450 
451     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr);
452     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
453 
454     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
455       &flg);CHKERRQ(ierr);
456     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
457 
458     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
459       &flg);CHKERRQ(ierr);
460     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
461 
462     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr);
463     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
464 
465     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr);
466     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
467 
468     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr);
469     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
470 
471     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr);
472     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
473 
474     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr);
475     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
476 
477     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr);
478     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
479 
480     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr);
481     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
482 
483     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr);
484     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
485   }
486 
487   ierr = PetscOptionsEnd();CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
492 {
493   PetscErrorCode  ierr;
494   PetscMPIInt     size;
495 
496   PetscFunctionBegin;
497 
498   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
499   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
500 
501   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
502   mat_mkl_cpardiso->maxfct = 1;
503   mat_mkl_cpardiso->mnum = 1;
504   mat_mkl_cpardiso->n = A->rmap->N;
505   mat_mkl_cpardiso->msglvl = 0;
506   mat_mkl_cpardiso->nrhs = 1;
507   mat_mkl_cpardiso->err = 0;
508   mat_mkl_cpardiso->phase = -1;
509 #if defined(PETSC_USE_COMPLEX)
510   mat_mkl_cpardiso->mtype = 13;
511 #else
512   mat_mkl_cpardiso->mtype = 11;
513 #endif
514 
515 #if defined(PETSC_USE_REAL_SINGLE)
516   mat_mkl_cpardiso->iparm[27] = 1;
517 #else
518   mat_mkl_cpardiso->iparm[27] = 0;
519 #endif
520 
521   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
522 
523   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
524   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
525   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
526   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
527   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
528   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
529   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
530   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
531   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
532   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
533 
534   mat_mkl_cpardiso->iparm[39] = 0;
535   if (size > 1) {
536     mat_mkl_cpardiso->iparm[39] = 2;
537     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
538     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
539   }
540   mat_mkl_cpardiso->perm = 0;
541   PetscFunctionReturn(0);
542 }
543 
544 /*
545  * Symbolic decomposition. Mkl_Pardiso analysis phase.
546  */
547 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
548 {
549   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
550   PetscErrorCode  ierr;
551 
552   PetscFunctionBegin;
553   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
554 
555   /* Set MKL_CPARDISO options from the options database */
556   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
557   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
558 
559   mat_mkl_cpardiso->n = A->rmap->N;
560 
561   /* analysis phase */
562   /*----------------*/
563   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
564 
565   cluster_sparse_solver (
566     mat_mkl_cpardiso->pt,
567     &mat_mkl_cpardiso->maxfct,
568     &mat_mkl_cpardiso->mnum,
569     &mat_mkl_cpardiso->mtype,
570     &mat_mkl_cpardiso->phase,
571     &mat_mkl_cpardiso->n,
572     mat_mkl_cpardiso->a,
573     mat_mkl_cpardiso->ia,
574     mat_mkl_cpardiso->ja,
575     mat_mkl_cpardiso->perm,
576     &mat_mkl_cpardiso->nrhs,
577     mat_mkl_cpardiso->iparm,
578     &mat_mkl_cpardiso->msglvl,
579     NULL,
580     NULL,
581     &mat_mkl_cpardiso->comm_mkl_cpardiso,
582     (PetscInt*)&mat_mkl_cpardiso->err);
583 
584   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
585 
586   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
587   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
588   F->ops->solve           = MatSolve_MKL_CPARDISO;
589   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
590   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
591   PetscFunctionReturn(0);
592 }
593 
594 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
595 {
596   PetscErrorCode    ierr;
597   PetscBool         iascii;
598   PetscViewerFormat format;
599   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
600   PetscInt          i;
601 
602   PetscFunctionBegin;
603   /* check if matrix is mkl_cpardiso type */
604   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
605 
606   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
607   if (iascii) {
608     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
609     if (format == PETSC_VIEWER_ASCII_INFO) {
610       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
611       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
612       for(i = 1; i <= 64; i++){
613         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
614       }
615       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
616       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
617       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
618       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
619       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
620       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
627 {
628   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data;
629 
630   PetscFunctionBegin;
631   info->block_size        = 1.0;
632   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
633   info->nz_unneeded       = 0.0;
634   info->assemblies        = 0.0;
635   info->mallocs           = 0.0;
636   info->memory            = 0.0;
637   info->fill_ratio_given  = 0;
638   info->fill_ratio_needed = 0;
639   info->factor_mallocs    = 0;
640   PetscFunctionReturn(0);
641 }
642 
643 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
644 {
645   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data;
646 
647   PetscFunctionBegin;
648   if(icntl <= 64){
649     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
650   } else {
651     if(icntl == 65) mkl_set_num_threads((int)ival);
652     else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival;
653     else if(icntl == 67) mat_mkl_cpardiso->mnum = ival;
654     else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival;
655     else if(icntl == 69){
656       mat_mkl_cpardiso->mtype = ival;
657 #if defined(PETSC_USE_REAL_SINGLE)
658       mat_mkl_cpardiso->iparm[27] = 1;
659 #else
660       mat_mkl_cpardiso->iparm[27] = 0;
661 #endif
662       mat_mkl_cpardiso->iparm[34] = 1;
663     }
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 /*@
669   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
670 
671    Logically Collective on Mat
672 
673    Input Parameters:
674 +  F - the factored matrix obtained by calling MatGetFactor()
675 .  icntl - index of Mkl_Pardiso parameter
676 -  ival - value of Mkl_Pardiso parameter
677 
678   Options Database:
679 .   -mat_mkl_cpardiso_<icntl> <ival>
680 
681    Level: Intermediate
682 
683    Notes:
684     This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options
685           database approach when working with TS, SNES, or KSP.
686 
687    References:
688 .      Mkl_Pardiso Users' Guide
689 
690 .seealso: MatGetFactor()
691 @*/
692 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
693 {
694   PetscErrorCode ierr;
695 
696   PetscFunctionBegin;
697   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
702 {
703   PetscFunctionBegin;
704   *type = MATSOLVERMKL_CPARDISO;
705   PetscFunctionReturn(0);
706 }
707 
708 /* MatGetFactor for MPI AIJ matrices */
709 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
710 {
711   Mat              B;
712   PetscErrorCode   ierr;
713   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
714   PetscBool        isSeqAIJ;
715 
716   PetscFunctionBegin;
717   /* Create the factorization matrix */
718 
719   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
720   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
721   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
722   ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr);
723   ierr = MatSetUp(B);CHKERRQ(ierr);
724 
725   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
726 
727   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
728   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
729 
730   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
731   B->ops->destroy = MatDestroy_MKL_CPARDISO;
732 
733   B->ops->view    = MatView_MKL_CPARDISO;
734   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
735 
736   B->factortype   = ftype;
737   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
738 
739   B->data = mat_mkl_cpardiso;
740 
741   /* set solvertype */
742   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
743   ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr);
744 
745   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr);
746   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
747   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
748 
749   *F = B;
750   PetscFunctionReturn(0);
751 }
752 
753 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
754 {
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
759   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762