xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision 5e71baeff2f3138f93cd4f5927dfd596eb8325cc)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #include <mkl.h>
12 #include <mkl_cluster_sparse_solver.h>
13 
14 /*
15  *  Possible mkl_cpardiso phases that controls the execution of the solver.
16  *  For more information check mkl_cpardiso manual.
17  */
18 #define JOB_ANALYSIS 11
19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
21 #define JOB_NUMERICAL_FACTORIZATION 22
22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
27 #define JOB_RELEASE_OF_LU_MEMORY 0
28 #define JOB_RELEASE_OF_ALL_MEMORY -1
29 
30 #define IPARM_SIZE 64
31 #define INT_TYPE MKL_INT
32 
33 static const char *Err_MSG_CPardiso(int errNo){
34   switch (errNo) {
35     case -1:
36       return "input inconsistent"; break;
37     case -2:
38       return "not enough memory"; break;
39     case -3:
40       return "reordering problem"; break;
41     case -4:
42       return "zero pivot, numerical factorization or iterative refinement problem"; break;
43     case -5:
44       return "unclassified (internal) error"; break;
45     case -6:
46       return "preordering failed (matrix types 11, 13 only)"; break;
47     case -7:
48       return "diagonal matrix problem"; break;
49     case -8:
50       return "32-bit integer overflow problem"; break;
51     case -9:
52       return "not enough memory for OOC"; break;
53     case -10:
54       return "problems with opening OOC temporary files"; break;
55     case -11:
56       return "read/write problems with the OOC data file"; break;
57     default :
58       return "unknown error";
59   }
60 }
61 
62 /*
63  *  Internal data structure.
64  *  For more information check mkl_cpardiso manual.
65  */
66 
67 typedef struct {
68 
69   /* Configuration vector */
70   INT_TYPE     iparm[IPARM_SIZE];
71 
72   /*
73    * Internal mkl_cpardiso memory location.
74    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
75    */
76   void         *pt[IPARM_SIZE];
77 
78   MPI_Comm     comm_mkl_cpardiso;
79 
80   /* Basic mkl_cpardiso info*/
81   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
82 
83   /* Matrix structure */
84   PetscScalar  *a;
85 
86   INT_TYPE     *ia, *ja;
87 
88   /* Number of non-zero elements */
89   INT_TYPE     nz;
90 
91   /* Row permutaton vector*/
92   INT_TYPE     *perm;
93 
94   /* Define is matrix preserve sparce structure. */
95   MatStructure matstruc;
96 
97   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
98 
99   /* True if mkl_cpardiso function have been used. */
100   PetscBool CleanUp;
101 } Mat_MKL_CPARDISO;
102 
103 /*
104  * Copy the elements of matrix A.
105  * Input:
106  *   - Mat A: MATSEQAIJ matrix
107  *   - int shift: matrix index.
108  *     - 0 for c representation
109  *     - 1 for fortran representation
110  *   - MatReuse reuse:
111  *     - MAT_INITIAL_MATRIX: Create a new aij representation
112  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
113  * Output:
114  *   - int *nnz: Number of nonzero-elements.
115  *   - int **r pointer to i index
116  *   - int **c pointer to j elements
117  *   - MATRIXTYPE **v: Non-zero elements
118  */
119 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
120 {
121   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
122 
123   PetscFunctionBegin;
124   *v=aa->a;
125   if (reuse == MAT_INITIAL_MATRIX) {
126     *r   = (INT_TYPE*)aa->i;
127     *c   = (INT_TYPE*)aa->j;
128     *nnz = aa->nz;
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
134 {
135   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
136   PetscErrorCode    ierr;
137   PetscInt          rstart,nz,i,j,jj,irow,countA,countB;
138   PetscInt          *row,*col;
139   const PetscScalar *av, *bv,*v1,*v2;
140   PetscScalar       *val;
141   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
142   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
143   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
144   PetscInt          nn, colA_start,jB,jcol;
145 
146   PetscFunctionBegin;
147   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
148   av=aa->a; bv=bb->a;
149 
150   garray = mat->garray;
151 
152   if (reuse == MAT_INITIAL_MATRIX) {
153     nz   = aa->nz + bb->nz;
154     *nnz = nz;
155     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
156     col  = row + m + 1;
157     val  = (PetscScalar*)(col + nz);
158     *r = row; *c = col; *v = val;
159     row[0] = 0;
160   } else {
161     row = *r; col = *c; val = *v;
162   }
163 
164   nz = 0;
165   for (i=0; i<m; i++) {
166     row[i] = nz;
167     countA     = ai[i+1] - ai[i];
168     countB     = bi[i+1] - bi[i];
169     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
170     bjj        = bj + bi[i];
171 
172     /* B part, smaller col index */
173     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
174     jB         = 0;
175     for (j=0; j<countB; j++) {
176       jcol = garray[bjj[j]];
177       if (jcol > colA_start) {
178         jB = j;
179         break;
180       }
181       col[nz]   = jcol;
182       val[nz++] = *bv++;
183       if (j==countB-1) jB = countB;
184     }
185 
186     /* A part */
187     for (j=0; j<countA; j++) {
188       col[nz]   = rstart + ajj[j];
189       val[nz++] = *av++;
190     }
191 
192     /* B part, larger col index */
193     for (j=jB; j<countB; j++) {
194       col[nz]   = garray[bjj[j]];
195       val[nz++] = *bv++;
196     }
197   }
198   row[m] = nz;
199 
200   PetscFunctionReturn(0);
201 }
202 
203 /*
204  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
205  */
206 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
207 {
208   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
209   PetscErrorCode   ierr;
210 
211   PetscFunctionBegin;
212   /* Terminate instance, deallocate memories */
213   if (mat_mkl_cpardiso->CleanUp) {
214     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
215 
216     cluster_sparse_solver (
217       mat_mkl_cpardiso->pt,
218       &mat_mkl_cpardiso->maxfct,
219       &mat_mkl_cpardiso->mnum,
220       &mat_mkl_cpardiso->mtype,
221       &mat_mkl_cpardiso->phase,
222       &mat_mkl_cpardiso->n,
223       NULL,
224       NULL,
225       NULL,
226       mat_mkl_cpardiso->perm,
227       &mat_mkl_cpardiso->nrhs,
228       mat_mkl_cpardiso->iparm,
229       &mat_mkl_cpardiso->msglvl,
230       NULL,
231       NULL,
232       &mat_mkl_cpardiso->comm_mkl_cpardiso,
233       &mat_mkl_cpardiso->err);
234   }
235 
236   if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) {
237     ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr);
238   }
239   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
240   ierr = PetscFree(A->data);CHKERRQ(ierr);
241 
242   /* clear composed functions */
243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
244   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 /*
249  * Computes Ax = b
250  */
251 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
252 {
253   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
254   PetscErrorCode    ierr;
255   PetscScalar       *xarray;
256   const PetscScalar *barray;
257 
258   PetscFunctionBegin;
259   mat_mkl_cpardiso->nrhs = 1;
260   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
261   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
262 
263   /* solve phase */
264   /*-------------*/
265   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
266   cluster_sparse_solver (
267     mat_mkl_cpardiso->pt,
268     &mat_mkl_cpardiso->maxfct,
269     &mat_mkl_cpardiso->mnum,
270     &mat_mkl_cpardiso->mtype,
271     &mat_mkl_cpardiso->phase,
272     &mat_mkl_cpardiso->n,
273     mat_mkl_cpardiso->a,
274     mat_mkl_cpardiso->ia,
275     mat_mkl_cpardiso->ja,
276     mat_mkl_cpardiso->perm,
277     &mat_mkl_cpardiso->nrhs,
278     mat_mkl_cpardiso->iparm,
279     &mat_mkl_cpardiso->msglvl,
280     (void*)barray,
281     (void*)xarray,
282     &mat_mkl_cpardiso->comm_mkl_cpardiso,
283     &mat_mkl_cpardiso->err);
284 
285   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
286 
287   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
288   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
289   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
290   PetscFunctionReturn(0);
291 }
292 
293 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
294 {
295   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
296   PetscErrorCode   ierr;
297 
298   PetscFunctionBegin;
299 #if defined(PETSC_USE_COMPLEX)
300   mat_mkl_cpardiso->iparm[12 - 1] = 1;
301 #else
302   mat_mkl_cpardiso->iparm[12 - 1] = 2;
303 #endif
304   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
305   mat_mkl_cpardiso->iparm[12 - 1] = 0;
306   PetscFunctionReturn(0);
307 }
308 
309 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
310 {
311   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
312   PetscErrorCode    ierr;
313   PetscScalar       *barray, *xarray;
314   PetscBool         flg;
315 
316   PetscFunctionBegin;
317   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr);
318   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
319   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr);
320   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
321 
322   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
323 
324   if(mat_mkl_cpardiso->nrhs > 0){
325     ierr = MatDenseGetArray(B,&barray);
326     ierr = MatDenseGetArray(X,&xarray);
327 
328     /* solve phase */
329     /*-------------*/
330     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
331     cluster_sparse_solver (
332       mat_mkl_cpardiso->pt,
333       &mat_mkl_cpardiso->maxfct,
334       &mat_mkl_cpardiso->mnum,
335       &mat_mkl_cpardiso->mtype,
336       &mat_mkl_cpardiso->phase,
337       &mat_mkl_cpardiso->n,
338       mat_mkl_cpardiso->a,
339       mat_mkl_cpardiso->ia,
340       mat_mkl_cpardiso->ja,
341       mat_mkl_cpardiso->perm,
342       &mat_mkl_cpardiso->nrhs,
343       mat_mkl_cpardiso->iparm,
344       &mat_mkl_cpardiso->msglvl,
345       (void*)barray,
346       (void*)xarray,
347       &mat_mkl_cpardiso->comm_mkl_cpardiso,
348       &mat_mkl_cpardiso->err);
349     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
350   }
351   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
352   PetscFunctionReturn(0);
353 
354 }
355 
356 /*
357  * LU Decomposition
358  */
359 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
360 {
361   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
362   PetscErrorCode   ierr;
363 
364   PetscFunctionBegin;
365   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
366   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
367 
368   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
369   cluster_sparse_solver (
370     mat_mkl_cpardiso->pt,
371     &mat_mkl_cpardiso->maxfct,
372     &mat_mkl_cpardiso->mnum,
373     &mat_mkl_cpardiso->mtype,
374     &mat_mkl_cpardiso->phase,
375     &mat_mkl_cpardiso->n,
376     mat_mkl_cpardiso->a,
377     mat_mkl_cpardiso->ia,
378     mat_mkl_cpardiso->ja,
379     mat_mkl_cpardiso->perm,
380     &mat_mkl_cpardiso->nrhs,
381     mat_mkl_cpardiso->iparm,
382     &mat_mkl_cpardiso->msglvl,
383     NULL,
384     NULL,
385     &mat_mkl_cpardiso->comm_mkl_cpardiso,
386     &mat_mkl_cpardiso->err);
387   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
388 
389   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
390   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
391   PetscFunctionReturn(0);
392 }
393 
394 /* Sets mkl_cpardiso options from the options database */
395 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
396 {
397   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
398   PetscErrorCode      ierr;
399   PetscInt            icntl;
400   PetscBool           flg;
401   int                 pt[IPARM_SIZE], threads;
402 
403   PetscFunctionBegin;
404   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
405   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr);
406   if (flg) mkl_set_num_threads(threads);
407 
408   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr);
409   if (flg) mat_mkl_cpardiso->maxfct = icntl;
410 
411   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr);
412   if (flg) mat_mkl_cpardiso->mnum = icntl;
413 
414   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr);
415   if (flg) mat_mkl_cpardiso->msglvl = icntl;
416 
417   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr);
418   if(flg){
419     mat_mkl_cpardiso->mtype = icntl;
420 #if defined(PETSC_USE_REAL_SINGLE)
421     mat_mkl_cpardiso->iparm[27] = 1;
422 #else
423     mat_mkl_cpardiso->iparm[27] = 0;
424 #endif
425     mat_mkl_cpardiso->iparm[34] = 1;
426   }
427   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr);
428 
429   if(flg && icntl != 0){
430     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr);
431     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
432 
433     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr);
434     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
435 
436     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr);
437     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
438 
439     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr);
440     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
441 
442     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr);
443     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
444 
445     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr);
446     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
447 
448     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr);
449     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
450 
451     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr);
452     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
453 
454     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
455       &flg);CHKERRQ(ierr);
456     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
457 
458     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
459       &flg);CHKERRQ(ierr);
460     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
461 
462     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr);
463     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
464 
465     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr);
466     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
467 
468     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr);
469     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
470 
471     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr);
472     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
473 
474     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr);
475     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
476 
477     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr);
478     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
479 
480     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr);
481     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
482 
483     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr);
484     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
485   }
486 
487   ierr = PetscOptionsEnd();CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
492 {
493   PetscErrorCode  ierr;
494   PetscInt        i;
495   PetscMPIInt     size;
496 
497   PetscFunctionBegin;
498 
499   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
500   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
501 
502   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
503   mat_mkl_cpardiso->maxfct = 1;
504   mat_mkl_cpardiso->mnum = 1;
505   mat_mkl_cpardiso->n = A->rmap->N;
506   mat_mkl_cpardiso->msglvl = 0;
507   mat_mkl_cpardiso->nrhs = 1;
508   mat_mkl_cpardiso->err = 0;
509   mat_mkl_cpardiso->phase = -1;
510 #if defined(PETSC_USE_COMPLEX)
511   mat_mkl_cpardiso->mtype = 13;
512 #else
513   mat_mkl_cpardiso->mtype = 11;
514 #endif
515 
516 #if defined(PETSC_USE_REAL_SINGLE)
517   mat_mkl_cpardiso->iparm[27] = 1;
518 #else
519   mat_mkl_cpardiso->iparm[27] = 0;
520 #endif
521 
522   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
523 
524   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
525   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
526   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
527   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
528   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
529   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
530   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
531   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
532   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
533   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
534 
535   mat_mkl_cpardiso->iparm[39] = 0;
536   if (size > 1) {
537     mat_mkl_cpardiso->iparm[39] = 2;
538     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
539     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
540   }
541   mat_mkl_cpardiso->perm = 0;
542   PetscFunctionReturn(0);
543 }
544 
545 /*
546  * Symbolic decomposition. Mkl_Pardiso analysis phase.
547  */
548 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
549 {
550   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
551   PetscErrorCode  ierr;
552 
553   PetscFunctionBegin;
554   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
555 
556   /* Set MKL_CPARDISO options from the options database */
557   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
558   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
559 
560   mat_mkl_cpardiso->n = A->rmap->N;
561 
562   /* analysis phase */
563   /*----------------*/
564   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
565 
566   cluster_sparse_solver (
567     mat_mkl_cpardiso->pt,
568     &mat_mkl_cpardiso->maxfct,
569     &mat_mkl_cpardiso->mnum,
570     &mat_mkl_cpardiso->mtype,
571     &mat_mkl_cpardiso->phase,
572     &mat_mkl_cpardiso->n,
573     mat_mkl_cpardiso->a,
574     mat_mkl_cpardiso->ia,
575     mat_mkl_cpardiso->ja,
576     mat_mkl_cpardiso->perm,
577     &mat_mkl_cpardiso->nrhs,
578     mat_mkl_cpardiso->iparm,
579     &mat_mkl_cpardiso->msglvl,
580     NULL,
581     NULL,
582     &mat_mkl_cpardiso->comm_mkl_cpardiso,
583     &mat_mkl_cpardiso->err);
584 
585   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
586 
587   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
588   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
589   F->ops->solve           = MatSolve_MKL_CPARDISO;
590   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
591   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
592   PetscFunctionReturn(0);
593 }
594 
595 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
596 {
597   PetscErrorCode    ierr;
598   PetscBool         iascii;
599   PetscViewerFormat format;
600   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
601   PetscInt          i;
602 
603   PetscFunctionBegin;
604   /* check if matrix is mkl_cpardiso type */
605   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
606 
607   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
608   if (iascii) {
609     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
610     if (format == PETSC_VIEWER_ASCII_INFO) {
611       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
612       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
613       for(i = 1; i <= 64; i++){
614         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
615       }
616       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
617       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
618       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
619       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
620       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
621       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
622     }
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
628 {
629   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data;
630 
631   PetscFunctionBegin;
632   info->block_size        = 1.0;
633   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
634   info->nz_unneeded       = 0.0;
635   info->assemblies        = 0.0;
636   info->mallocs           = 0.0;
637   info->memory            = 0.0;
638   info->fill_ratio_given  = 0;
639   info->fill_ratio_needed = 0;
640   info->factor_mallocs    = 0;
641   PetscFunctionReturn(0);
642 }
643 
644 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
645 {
646   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data;
647 
648   PetscFunctionBegin;
649   if(icntl <= 64){
650     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
651   } else {
652     if(icntl == 65) mkl_set_num_threads((int)ival);
653     else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival;
654     else if(icntl == 67) mat_mkl_cpardiso->mnum = ival;
655     else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival;
656     else if(icntl == 69){
657       int pt[IPARM_SIZE];
658       mat_mkl_cpardiso->mtype = ival;
659 #if defined(PETSC_USE_REAL_SINGLE)
660       mat_mkl_cpardiso->iparm[27] = 1;
661 #else
662       mat_mkl_cpardiso->iparm[27] = 0;
663 #endif
664       mat_mkl_cpardiso->iparm[34] = 1;
665     }
666   }
667   PetscFunctionReturn(0);
668 }
669 
670 /*@
671   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
672 
673    Logically Collective on Mat
674 
675    Input Parameters:
676 +  F - the factored matrix obtained by calling MatGetFactor()
677 .  icntl - index of Mkl_Pardiso parameter
678 -  ival - value of Mkl_Pardiso parameter
679 
680   Options Database:
681 .   -mat_mkl_cpardiso_<icntl> <ival>
682 
683    Level: Intermediate
684 
685    Notes: This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options
686           database approach when working with TS, SNES, or KSP.
687 
688    References:
689 .      Mkl_Pardiso Users' Guide
690 
691 .seealso: MatGetFactor()
692 @*/
693 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
694 {
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
699   PetscFunctionReturn(0);
700 }
701 
702 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type)
703 {
704   PetscFunctionBegin;
705   *type = MATSOLVERMKL_CPARDISO;
706   PetscFunctionReturn(0);
707 }
708 
709 /* MatGetFactor for MPI AIJ matrices */
710 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
711 {
712   Mat              B;
713   PetscErrorCode   ierr;
714   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
715   PetscBool        isSeqAIJ;
716 
717   PetscFunctionBegin;
718   /* Create the factorization matrix */
719 
720   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
721   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
722   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
723   ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr);
724   ierr = MatSetUp(B);CHKERRQ(ierr);
725 
726   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
727 
728   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
729   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
730 
731   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
732   B->ops->destroy = MatDestroy_MKL_CPARDISO;
733 
734   B->ops->view    = MatView_MKL_CPARDISO;
735   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
736 
737   B->factortype   = ftype;
738   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
739 
740   B->data = mat_mkl_cpardiso;
741 
742   /* set solvertype */
743   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
744   ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr);
745 
746   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr);
747   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
748   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
749 
750   *F = B;
751   PetscFunctionReturn(0);
752 }
753 
754 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void)
755 {
756   PetscErrorCode ierr;
757 
758   PetscFunctionBegin;
759   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
760   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
761   PetscFunctionReturn(0);
762 }
763