xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision 39720866a7a609e8eea00a1f6626febe75724041)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>                       /*I "petscmat.h" I*/
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
12 #define MKL_ILP64
13 #endif
14 #include <mkl.h>
15 #include <mkl_cluster_sparse_solver.h>
16 
17 /*
18  *  Possible mkl_cpardiso phases that controls the execution of the solver.
19  *  For more information check mkl_cpardiso manual.
20  */
21 #define JOB_ANALYSIS 11
22 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
23 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
24 #define JOB_NUMERICAL_FACTORIZATION 22
25 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
26 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
27 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
28 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
29 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
30 #define JOB_RELEASE_OF_LU_MEMORY 0
31 #define JOB_RELEASE_OF_ALL_MEMORY -1
32 
33 #define IPARM_SIZE 64
34 #define INT_TYPE MKL_INT
35 
36 static const char *Err_MSG_CPardiso(int errNo){
37   switch (errNo) {
38     case -1:
39       return "input inconsistent"; break;
40     case -2:
41       return "not enough memory"; break;
42     case -3:
43       return "reordering problem"; break;
44     case -4:
45       return "zero pivot, numerical factorization or iterative refinement problem"; break;
46     case -5:
47       return "unclassified (internal) error"; break;
48     case -6:
49       return "preordering failed (matrix types 11, 13 only)"; break;
50     case -7:
51       return "diagonal matrix problem"; break;
52     case -8:
53       return "32-bit integer overflow problem"; break;
54     case -9:
55       return "not enough memory for OOC"; break;
56     case -10:
57       return "problems with opening OOC temporary files"; break;
58     case -11:
59       return "read/write problems with the OOC data file"; break;
60     default :
61       return "unknown error";
62   }
63 }
64 
65 /*
66  *  Internal data structure.
67  *  For more information check mkl_cpardiso manual.
68  */
69 
70 typedef struct {
71 
72   /* Configuration vector */
73   INT_TYPE     iparm[IPARM_SIZE];
74 
75   /*
76    * Internal mkl_cpardiso memory location.
77    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
78    */
79   void         *pt[IPARM_SIZE];
80 
81   MPI_Comm     comm_mkl_cpardiso;
82 
83   /* Basic mkl_cpardiso info*/
84   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
85 
86   /* Matrix structure */
87   PetscScalar  *a;
88 
89   INT_TYPE     *ia, *ja;
90 
91   /* Number of non-zero elements */
92   INT_TYPE     nz;
93 
94   /* Row permutaton vector*/
95   INT_TYPE     *perm;
96 
97   /* Define is matrix preserve sparce structure. */
98   MatStructure matstruc;
99 
100   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
101 
102   /* True if mkl_cpardiso function have been used. */
103   PetscBool CleanUp;
104 } Mat_MKL_CPARDISO;
105 
106 /*
107  * Copy the elements of matrix A.
108  * Input:
109  *   - Mat A: MATSEQAIJ matrix
110  *   - int shift: matrix index.
111  *     - 0 for c representation
112  *     - 1 for fortran representation
113  *   - MatReuse reuse:
114  *     - MAT_INITIAL_MATRIX: Create a new aij representation
115  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
116  * Output:
117  *   - int *nnz: Number of nonzero-elements.
118  *   - int **r pointer to i index
119  *   - int **c pointer to j elements
120  *   - MATRIXTYPE **v: Non-zero elements
121  */
122 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
123 {
124   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
125 
126   PetscFunctionBegin;
127   *v=aa->a;
128   if (reuse == MAT_INITIAL_MATRIX) {
129     *r   = (INT_TYPE*)aa->i;
130     *c   = (INT_TYPE*)aa->j;
131     *nnz = aa->nz;
132   }
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
137 {
138   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
139   PetscErrorCode    ierr;
140   PetscInt          rstart,nz,i,j,countA,countB;
141   PetscInt          *row,*col;
142   const PetscScalar *av, *bv;
143   PetscScalar       *val;
144   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
145   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
146   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
147   PetscInt          colA_start,jB,jcol;
148 
149   PetscFunctionBegin;
150   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
151   av=aa->a; bv=bb->a;
152 
153   garray = mat->garray;
154 
155   if (reuse == MAT_INITIAL_MATRIX) {
156     nz   = aa->nz + bb->nz;
157     *nnz = nz;
158     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
159     col  = row + m + 1;
160     val  = (PetscScalar*)(col + nz);
161     *r = row; *c = col; *v = val;
162     row[0] = 0;
163   } else {
164     row = *r; col = *c; val = *v;
165   }
166 
167   nz = 0;
168   for (i=0; i<m; i++) {
169     row[i] = nz;
170     countA     = ai[i+1] - ai[i];
171     countB     = bi[i+1] - bi[i];
172     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
173     bjj        = bj + bi[i];
174 
175     /* B part, smaller col index */
176     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
177     jB         = 0;
178     for (j=0; j<countB; j++) {
179       jcol = garray[bjj[j]];
180       if (jcol > colA_start) {
181         jB = j;
182         break;
183       }
184       col[nz]   = jcol;
185       val[nz++] = *bv++;
186       if (j==countB-1) jB = countB;
187     }
188 
189     /* A part */
190     for (j=0; j<countA; j++) {
191       col[nz]   = rstart + ajj[j];
192       val[nz++] = *av++;
193     }
194 
195     /* B part, larger col index */
196     for (j=jB; j<countB; j++) {
197       col[nz]   = garray[bjj[j]];
198       val[nz++] = *bv++;
199     }
200   }
201   row[m] = nz;
202 
203   PetscFunctionReturn(0);
204 }
205 
206 /*
207  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
208  */
209 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
210 {
211   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
212   PetscErrorCode   ierr;
213 
214   PetscFunctionBegin;
215   /* Terminate instance, deallocate memories */
216   if (mat_mkl_cpardiso->CleanUp) {
217     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
218 
219     cluster_sparse_solver (
220       mat_mkl_cpardiso->pt,
221       &mat_mkl_cpardiso->maxfct,
222       &mat_mkl_cpardiso->mnum,
223       &mat_mkl_cpardiso->mtype,
224       &mat_mkl_cpardiso->phase,
225       &mat_mkl_cpardiso->n,
226       NULL,
227       NULL,
228       NULL,
229       mat_mkl_cpardiso->perm,
230       &mat_mkl_cpardiso->nrhs,
231       mat_mkl_cpardiso->iparm,
232       &mat_mkl_cpardiso->msglvl,
233       NULL,
234       NULL,
235       &mat_mkl_cpardiso->comm_mkl_cpardiso,
236       (int*)&mat_mkl_cpardiso->err);
237   }
238 
239   if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) {
240     ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr);
241   }
242   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
243   ierr = PetscFree(A->data);CHKERRQ(ierr);
244 
245   /* clear composed functions */
246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 /*
252  * Computes Ax = b
253  */
254 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
255 {
256   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
257   PetscErrorCode    ierr;
258   PetscScalar       *xarray;
259   const PetscScalar *barray;
260 
261   PetscFunctionBegin;
262   mat_mkl_cpardiso->nrhs = 1;
263   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
264   ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
265 
266   /* solve phase */
267   /*-------------*/
268   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
269   cluster_sparse_solver (
270     mat_mkl_cpardiso->pt,
271     &mat_mkl_cpardiso->maxfct,
272     &mat_mkl_cpardiso->mnum,
273     &mat_mkl_cpardiso->mtype,
274     &mat_mkl_cpardiso->phase,
275     &mat_mkl_cpardiso->n,
276     mat_mkl_cpardiso->a,
277     mat_mkl_cpardiso->ia,
278     mat_mkl_cpardiso->ja,
279     mat_mkl_cpardiso->perm,
280     &mat_mkl_cpardiso->nrhs,
281     mat_mkl_cpardiso->iparm,
282     &mat_mkl_cpardiso->msglvl,
283     (void*)barray,
284     (void*)xarray,
285     &mat_mkl_cpardiso->comm_mkl_cpardiso,
286     (int*)&mat_mkl_cpardiso->err);
287 
288   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
289 
290   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
291   ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
292   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
293   PetscFunctionReturn(0);
294 }
295 
296 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
297 {
298   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
299   PetscErrorCode   ierr;
300 
301   PetscFunctionBegin;
302 #if defined(PETSC_USE_COMPLEX)
303   mat_mkl_cpardiso->iparm[12 - 1] = 1;
304 #else
305   mat_mkl_cpardiso->iparm[12 - 1] = 2;
306 #endif
307   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
308   mat_mkl_cpardiso->iparm[12 - 1] = 0;
309   PetscFunctionReturn(0);
310 }
311 
312 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
313 {
314   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
315   PetscErrorCode    ierr;
316   PetscScalar       *xarray;
317   const PetscScalar *barray;
318   PetscBool         flg;
319 
320   PetscFunctionBegin;
321   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr);
322   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
323   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr);
324   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
325 
326   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
327 
328   if(mat_mkl_cpardiso->nrhs > 0){
329     ierr = MatDenseGetArrayRead(B,&barray);
330     ierr = MatDenseGetArray(X,&xarray);
331 
332     /* solve phase */
333     /*-------------*/
334     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
335     cluster_sparse_solver (
336       mat_mkl_cpardiso->pt,
337       &mat_mkl_cpardiso->maxfct,
338       &mat_mkl_cpardiso->mnum,
339       &mat_mkl_cpardiso->mtype,
340       &mat_mkl_cpardiso->phase,
341       &mat_mkl_cpardiso->n,
342       mat_mkl_cpardiso->a,
343       mat_mkl_cpardiso->ia,
344       mat_mkl_cpardiso->ja,
345       mat_mkl_cpardiso->perm,
346       &mat_mkl_cpardiso->nrhs,
347       mat_mkl_cpardiso->iparm,
348       &mat_mkl_cpardiso->msglvl,
349       (void*)barray,
350       (void*)xarray,
351       &mat_mkl_cpardiso->comm_mkl_cpardiso,
352       (int*)&mat_mkl_cpardiso->err);
353     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
354     ierr = MatDenseRestoreArrayRead(B,&barray);
355     ierr = MatDenseRestoreArray(X,&xarray);
356 
357   }
358   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
359   PetscFunctionReturn(0);
360 
361 }
362 
363 /*
364  * LU Decomposition
365  */
366 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
367 {
368   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
369   PetscErrorCode   ierr;
370 
371   PetscFunctionBegin;
372   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
373   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
374 
375   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
376   cluster_sparse_solver (
377     mat_mkl_cpardiso->pt,
378     &mat_mkl_cpardiso->maxfct,
379     &mat_mkl_cpardiso->mnum,
380     &mat_mkl_cpardiso->mtype,
381     &mat_mkl_cpardiso->phase,
382     &mat_mkl_cpardiso->n,
383     mat_mkl_cpardiso->a,
384     mat_mkl_cpardiso->ia,
385     mat_mkl_cpardiso->ja,
386     mat_mkl_cpardiso->perm,
387     &mat_mkl_cpardiso->nrhs,
388     mat_mkl_cpardiso->iparm,
389     &mat_mkl_cpardiso->msglvl,
390     NULL,
391     NULL,
392     &mat_mkl_cpardiso->comm_mkl_cpardiso,
393     &mat_mkl_cpardiso->err);
394   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
395 
396   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
397   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
398   PetscFunctionReturn(0);
399 }
400 
401 /* Sets mkl_cpardiso options from the options database */
402 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
403 {
404   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
405   PetscErrorCode      ierr;
406   PetscInt            icntl;
407   PetscBool           flg;
408   int                 threads;
409 
410   PetscFunctionBegin;
411   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
412   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr);
413   if (flg) mkl_set_num_threads(threads);
414 
415   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr);
416   if (flg) mat_mkl_cpardiso->maxfct = icntl;
417 
418   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr);
419   if (flg) mat_mkl_cpardiso->mnum = icntl;
420 
421   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr);
422   if (flg) mat_mkl_cpardiso->msglvl = icntl;
423 
424   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr);
425   if(flg){
426     mat_mkl_cpardiso->mtype = icntl;
427 #if defined(PETSC_USE_REAL_SINGLE)
428     mat_mkl_cpardiso->iparm[27] = 1;
429 #else
430     mat_mkl_cpardiso->iparm[27] = 0;
431 #endif
432     mat_mkl_cpardiso->iparm[34] = 1;
433   }
434   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr);
435 
436   if(flg && icntl != 0){
437     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr);
438     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
439 
440     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr);
441     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
442 
443     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr);
444     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
445 
446     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr);
447     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
448 
449     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr);
450     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
451 
452     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr);
453     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
454 
455     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr);
456     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
457 
458     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr);
459     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
460 
461     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
462       &flg);CHKERRQ(ierr);
463     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
464 
465     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
466       &flg);CHKERRQ(ierr);
467     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
468 
469     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr);
470     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
471 
472     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr);
473     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
474 
475     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr);
476     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
477 
478     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr);
479     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
480 
481     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr);
482     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
483 
484     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr);
485     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
486 
487     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr);
488     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
489 
490     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr);
491     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
492   }
493 
494   ierr = PetscOptionsEnd();CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
499 {
500   PetscErrorCode  ierr;
501   PetscMPIInt     size;
502 
503   PetscFunctionBegin;
504 
505   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
506   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr);
507 
508   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
509   mat_mkl_cpardiso->maxfct = 1;
510   mat_mkl_cpardiso->mnum = 1;
511   mat_mkl_cpardiso->n = A->rmap->N;
512   mat_mkl_cpardiso->msglvl = 0;
513   mat_mkl_cpardiso->nrhs = 1;
514   mat_mkl_cpardiso->err = 0;
515   mat_mkl_cpardiso->phase = -1;
516 #if defined(PETSC_USE_COMPLEX)
517   mat_mkl_cpardiso->mtype = 13;
518 #else
519   mat_mkl_cpardiso->mtype = 11;
520 #endif
521 
522 #if defined(PETSC_USE_REAL_SINGLE)
523   mat_mkl_cpardiso->iparm[27] = 1;
524 #else
525   mat_mkl_cpardiso->iparm[27] = 0;
526 #endif
527 
528   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
529 
530   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
531   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
532   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
533   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
534   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
535   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
536   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
537   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
538   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
539   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
540 
541   mat_mkl_cpardiso->iparm[39] = 0;
542   if (size > 1) {
543     mat_mkl_cpardiso->iparm[39] = 2;
544     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
545     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
546   }
547   mat_mkl_cpardiso->perm = 0;
548   PetscFunctionReturn(0);
549 }
550 
551 /*
552  * Symbolic decomposition. Mkl_Pardiso analysis phase.
553  */
554 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
555 {
556   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
557   PetscErrorCode  ierr;
558 
559   PetscFunctionBegin;
560   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
561 
562   /* Set MKL_CPARDISO options from the options database */
563   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
564   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr);
565 
566   mat_mkl_cpardiso->n = A->rmap->N;
567 
568   /* analysis phase */
569   /*----------------*/
570   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
571 
572   cluster_sparse_solver (
573     mat_mkl_cpardiso->pt,
574     &mat_mkl_cpardiso->maxfct,
575     &mat_mkl_cpardiso->mnum,
576     &mat_mkl_cpardiso->mtype,
577     &mat_mkl_cpardiso->phase,
578     &mat_mkl_cpardiso->n,
579     mat_mkl_cpardiso->a,
580     mat_mkl_cpardiso->ia,
581     mat_mkl_cpardiso->ja,
582     mat_mkl_cpardiso->perm,
583     &mat_mkl_cpardiso->nrhs,
584     mat_mkl_cpardiso->iparm,
585     &mat_mkl_cpardiso->msglvl,
586     NULL,
587     NULL,
588     &mat_mkl_cpardiso->comm_mkl_cpardiso,
589     (int*)&mat_mkl_cpardiso->err);
590 
591   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
592 
593   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
594   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
595   F->ops->solve           = MatSolve_MKL_CPARDISO;
596   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
597   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
598   PetscFunctionReturn(0);
599 }
600 
601 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
602 {
603   PetscErrorCode    ierr;
604   PetscBool         iascii;
605   PetscViewerFormat format;
606   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
607   PetscInt          i;
608 
609   PetscFunctionBegin;
610   /* check if matrix is mkl_cpardiso type */
611   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
612 
613   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
614   if (iascii) {
615     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
616     if (format == PETSC_VIEWER_ASCII_INFO) {
617       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
618       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
619       for(i = 1; i <= 64; i++){
620         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
621       }
622       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
623       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
624       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
625       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
626       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
627       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
628     }
629   }
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
634 {
635   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data;
636 
637   PetscFunctionBegin;
638   info->block_size        = 1.0;
639   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
640   info->nz_unneeded       = 0.0;
641   info->assemblies        = 0.0;
642   info->mallocs           = 0.0;
643   info->memory            = 0.0;
644   info->fill_ratio_given  = 0;
645   info->fill_ratio_needed = 0;
646   info->factor_mallocs    = 0;
647   PetscFunctionReturn(0);
648 }
649 
650 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
651 {
652   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data;
653 
654   PetscFunctionBegin;
655   if(icntl <= 64){
656     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
657   } else {
658     if(icntl == 65) mkl_set_num_threads((int)ival);
659     else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival;
660     else if(icntl == 67) mat_mkl_cpardiso->mnum = ival;
661     else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival;
662     else if(icntl == 69){
663       mat_mkl_cpardiso->mtype = ival;
664 #if defined(PETSC_USE_REAL_SINGLE)
665       mat_mkl_cpardiso->iparm[27] = 1;
666 #else
667       mat_mkl_cpardiso->iparm[27] = 0;
668 #endif
669       mat_mkl_cpardiso->iparm[34] = 1;
670     }
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*@
676   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
677 
678    Logically Collective on Mat
679 
680    Input Parameters:
681 +  F - the factored matrix obtained by calling MatGetFactor()
682 .  icntl - index of Mkl_Pardiso parameter
683 -  ival - value of Mkl_Pardiso parameter
684 
685   Options Database:
686 .   -mat_mkl_cpardiso_<icntl> <ival>
687 
688    Level: Intermediate
689 
690    Notes:
691     This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options
692           database approach when working with TS, SNES, or KSP.
693 
694    References:
695 .      Mkl_Pardiso Users' Guide
696 
697 .seealso: MatGetFactor()
698 @*/
699 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
700 {
701   PetscErrorCode ierr;
702 
703   PetscFunctionBegin;
704   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
709 {
710   PetscFunctionBegin;
711   *type = MATSOLVERMKL_CPARDISO;
712   PetscFunctionReturn(0);
713 }
714 
715 /* MatGetFactor for MPI AIJ matrices */
716 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
717 {
718   Mat              B;
719   PetscErrorCode   ierr;
720   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
721   PetscBool        isSeqAIJ;
722 
723   PetscFunctionBegin;
724   /* Create the factorization matrix */
725 
726   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
727   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
728   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
729   ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr);
730   ierr = MatSetUp(B);CHKERRQ(ierr);
731 
732   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
733 
734   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
735   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
736 
737   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
738   B->ops->destroy = MatDestroy_MKL_CPARDISO;
739 
740   B->ops->view    = MatView_MKL_CPARDISO;
741   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
742 
743   B->factortype   = ftype;
744   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
745 
746   B->data = mat_mkl_cpardiso;
747 
748   /* set solvertype */
749   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
750   ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr);
751 
752   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr);
753   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
754   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
755 
756   *F = B;
757   PetscFunctionReturn(0);
758 }
759 
760 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
761 {
762   PetscErrorCode ierr;
763 
764   PetscFunctionBegin;
765   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
766   ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
767   PetscFunctionReturn(0);
768 }
769