xref: /petsc/src/mat/impls/aij/mpi/mkl_cpardiso/mkl_cpardiso.c (revision fb7da6fd3ceaf4e9fef121c07f450d58f8a79a54)
1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2 #define MKL_ILP64
3 #endif
4 
5 #include <../src/mat/impls/aij/seq/aij.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #include <mkl.h>
12 #include <mkl_cluster_sparse_solver.h>
13 
14 /*
15  *  Possible mkl_cpardiso phases that controls the execution of the solver.
16  *  For more information check mkl_cpardiso manual.
17  */
18 #define JOB_ANALYSIS 11
19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
21 #define JOB_NUMERICAL_FACTORIZATION 22
22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
27 #define JOB_RELEASE_OF_LU_MEMORY 0
28 #define JOB_RELEASE_OF_ALL_MEMORY -1
29 
30 #define IPARM_SIZE 64
31 #define INT_TYPE MKL_INT
32 
33 static const char *Err_MSG_CPardiso(int errNo){
34   switch (errNo) {
35     case -1:
36       return "input inconsistent"; break;
37     case -2:
38       return "not enough memory"; break;
39     case -3:
40       return "reordering problem"; break;
41     case -4:
42       return "zero pivot, numerical factorization or iterative refinement problem"; break;
43     case -5:
44       return "unclassified (internal) error"; break;
45     case -6:
46       return "preordering failed (matrix types 11, 13 only)"; break;
47     case -7:
48       return "diagonal matrix problem"; break;
49     case -8:
50       return "32-bit integer overflow problem"; break;
51     case -9:
52       return "not enough memory for OOC"; break;
53     case -10:
54       return "problems with opening OOC temporary files"; break;
55     case -11:
56       return "read/write problems with the OOC data file"; break;
57     default :
58       return "unknown error";
59   }
60 }
61 
62 /*
63  *  Internal data structure.
64  *  For more information check mkl_cpardiso manual.
65  */
66 
67 typedef struct {
68 
69   /*Configuration vector*/
70   INT_TYPE     iparm[IPARM_SIZE];
71 
72   /*
73    * Internal mkl_cpardiso memory location.
74    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
75    */
76   void         *pt[IPARM_SIZE];
77 
78   MPI_Comm     comm_mkl_cpardiso;
79 
80   /*Basic mkl_cpardiso info*/
81   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
82 
83   /*Matrix structure*/
84   PetscScalar  *a;
85 
86   INT_TYPE     *ia, *ja;
87 
88   /*Number of non-zero elements*/
89   INT_TYPE     nz;
90 
91   /*Row permutaton vector*/
92   INT_TYPE     *perm;
93 
94   /*Deffine is matrix preserve sparce structure.*/
95   MatStructure matstruc;
96 
97   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
98 
99   /*True if mkl_cpardiso function have been used.*/
100   PetscBool CleanUp;
101 } Mat_MKL_CPARDISO;
102 
103 /*
104  * Copy the elements of matrix A.
105  * Input:
106  *   - Mat A: MATSEQAIJ matrix
107  *   - int shift: matrix index.
108  *     - 0 for c representation
109  *     - 1 for fortran representation
110  *   - MatReuse reuse:
111  *     - MAT_INITIAL_MATRIX: Create a new aij representation
112  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
113  * Output:
114  *   - int *nnz: Number of nonzero-elements.
115  *   - int **r pointer to i index
116  *   - int **c pointer to j elements
117  *   - MATRIXTYPE **v: Non-zero elements
118  */
119 #undef __FUNCT__
120 #define __FUNCT__ "MatCopy_seqaij_seqaij_MKL_CPARDISO"
121 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v){
122   Mat_SeqAIJ     *aa=(Mat_SeqAIJ*)A->data;
123   PetscFunctionBegin;
124   *v=aa->a;
125   if (reuse == MAT_INITIAL_MATRIX) {
126     *r   = (INT_TYPE*)aa->i;
127     *c   = (INT_TYPE*)aa->j;
128     *nnz = aa->nz;
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO"
135 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
136 {
137   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
138   PetscErrorCode    ierr;
139   PetscInt          rstart,nz,i,j,jj,irow,countA,countB;
140   PetscInt          *row,*col;
141   const PetscScalar *av, *bv,*v1,*v2;
142   PetscScalar       *val;
143   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
144   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
145   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
146   PetscInt nn, colA_start,jB,jcol;
147 
148   PetscFunctionBegin;
149   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
150   av=aa->a; bv=bb->a;
151 
152   garray = mat->garray;
153 
154   if (reuse == MAT_INITIAL_MATRIX) {
155     nz   = aa->nz + bb->nz;
156     *nnz = nz;
157     ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr);
158     col  = row + m + 1;
159     val  = (PetscScalar*)(col + nz);
160     *r = row; *c = col; *v = val;
161     row[0] = 0;
162   } else {
163     row = *r; col = *c; val = *v;
164   }
165 
166   nz = 0;
167   for (i=0; i<m; i++) {
168     row[i] = nz;
169     countA     = ai[i+1] - ai[i];
170     countB     = bi[i+1] - bi[i];
171     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
172     bjj        = bj + bi[i];
173 
174     /* B part, smaller col index */
175     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
176     jB         = 0;
177     for (j=0; j<countB; j++) {
178       jcol = garray[bjj[j]];
179       if (jcol > colA_start) {
180         jB = j;
181         break;
182       }
183       col[nz]   = jcol;
184       val[nz++] = *bv++;
185       if (j==countB-1) jB = countB;
186     }
187 
188     /* A part */
189     for (j=0; j<countA; j++) {
190       col[nz]   = rstart + ajj[j];
191       val[nz++] = *av++;
192     }
193 
194     /* B part, larger col index */
195     for (j=jB; j<countB; j++) {
196       col[nz]   = garray[bjj[j]];
197       val[nz++] = *bv++;
198     }
199   }
200   row[m] = nz;
201 
202   PetscFunctionReturn(0);
203 }
204 
205 /*
206  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
207  */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatDestroy_MKL_CPARDISO"
210 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A){
211   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
212   PetscErrorCode ierr;
213 
214   PetscFunctionBegin;
215   /* Terminate instance, deallocate memories */
216   if (mat_mkl_cpardiso->CleanUp) {
217     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
218 
219     cluster_sparse_solver (
220       mat_mkl_cpardiso->pt,
221       &mat_mkl_cpardiso->maxfct,
222       &mat_mkl_cpardiso->mnum,
223       &mat_mkl_cpardiso->mtype,
224       &mat_mkl_cpardiso->phase,
225       &mat_mkl_cpardiso->n,
226       NULL,
227       NULL,
228       NULL,
229       mat_mkl_cpardiso->perm,
230       &mat_mkl_cpardiso->nrhs,
231       mat_mkl_cpardiso->iparm,
232       &mat_mkl_cpardiso->msglvl,
233       NULL,
234       NULL,
235       &mat_mkl_cpardiso->comm_mkl_cpardiso,
236       &mat_mkl_cpardiso->err);
237   }
238   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
239   ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
240 
241   /* clear composed functions */
242   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 /*
248  * Computes Ax = b
249  */
250 #undef __FUNCT__
251 #define __FUNCT__ "MatSolve_MKL_CPARDISO"
252 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x){
253   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr;
254   PetscErrorCode    ierr;
255   PetscScalar       *barray, *xarray;
256 
257   PetscFunctionBegin;
258 
259   mat_mkl_cpardiso->nrhs = 1;
260   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
261   ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
262 
263   /* solve phase */
264   /*-------------*/
265   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
266   cluster_sparse_solver (
267     mat_mkl_cpardiso->pt,
268     &mat_mkl_cpardiso->maxfct,
269     &mat_mkl_cpardiso->mnum,
270     &mat_mkl_cpardiso->mtype,
271     &mat_mkl_cpardiso->phase,
272     &mat_mkl_cpardiso->n,
273     mat_mkl_cpardiso->a,
274     mat_mkl_cpardiso->ia,
275     mat_mkl_cpardiso->ja,
276     mat_mkl_cpardiso->perm,
277     &mat_mkl_cpardiso->nrhs,
278     mat_mkl_cpardiso->iparm,
279     &mat_mkl_cpardiso->msglvl,
280     (void*)barray,
281     (void*)xarray,
282     &mat_mkl_cpardiso->comm_mkl_cpardiso,
283     &mat_mkl_cpardiso->err);
284 
285   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
286 
287   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
288   PetscFunctionReturn(0);
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatSolveTranspose_MKL_CPARDISO"
293 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x){
294   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
295   PetscErrorCode ierr;
296 
297   PetscFunctionBegin;
298 #if defined(PETSC_USE_COMPLEX)
299   mat_mkl_cpardiso->iparm[12 - 1] = 1;
300 #else
301   mat_mkl_cpardiso->iparm[12 - 1] = 2;
302 #endif
303   ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr);
304   mat_mkl_cpardiso->iparm[12 - 1] = 0;
305   PetscFunctionReturn(0);
306 }
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "MatMatSolve_MKL_CPARDISO"
310 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X){
311   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr;
312   PetscErrorCode    ierr;
313   PetscScalar       *barray, *xarray;
314   PetscBool      flg;
315 
316   PetscFunctionBegin;
317 
318   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr);
319   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
320   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr);
321   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
322 
323   ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
324 
325   if(mat_mkl_cpardiso->nrhs > 0){
326     ierr = MatDenseGetArray(B,&barray);
327     ierr = MatDenseGetArray(X,&xarray);
328 
329     /* solve phase */
330     /*-------------*/
331     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
332     cluster_sparse_solver (
333       mat_mkl_cpardiso->pt,
334       &mat_mkl_cpardiso->maxfct,
335       &mat_mkl_cpardiso->mnum,
336       &mat_mkl_cpardiso->mtype,
337       &mat_mkl_cpardiso->phase,
338       &mat_mkl_cpardiso->n,
339       mat_mkl_cpardiso->a,
340       mat_mkl_cpardiso->ia,
341       mat_mkl_cpardiso->ja,
342       mat_mkl_cpardiso->perm,
343       &mat_mkl_cpardiso->nrhs,
344       mat_mkl_cpardiso->iparm,
345       &mat_mkl_cpardiso->msglvl,
346       (void*)barray,
347       (void*)xarray,
348       &mat_mkl_cpardiso->comm_mkl_cpardiso,
349       &mat_mkl_cpardiso->err);
350     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
351   }
352   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
353   PetscFunctionReturn(0);
354 
355 }
356 
357 /*
358  * LU Decomposition
359  */
360 #undef __FUNCT__
361 #define __FUNCT__ "MatFactorNumeric_MKL_CPARDISO"
362 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info){
363   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->spptr;
364   PetscErrorCode ierr;
365 
366   /* numerical factorization phase */
367   /*-------------------------------*/
368 
369   PetscFunctionBegin;
370 
371   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
372   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a);CHKERRQ(ierr);
373 
374   /* numerical factorization phase */
375   /*-------------------------------*/
376   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
377   cluster_sparse_solver (
378     mat_mkl_cpardiso->pt,
379     &mat_mkl_cpardiso->maxfct,
380     &mat_mkl_cpardiso->mnum,
381     &mat_mkl_cpardiso->mtype,
382     &mat_mkl_cpardiso->phase,
383     &mat_mkl_cpardiso->n,
384     mat_mkl_cpardiso->a,
385     mat_mkl_cpardiso->ia,
386     mat_mkl_cpardiso->ja,
387     mat_mkl_cpardiso->perm,
388     &mat_mkl_cpardiso->nrhs,
389     mat_mkl_cpardiso->iparm,
390     &mat_mkl_cpardiso->msglvl,
391     NULL,
392     NULL,
393     &mat_mkl_cpardiso->comm_mkl_cpardiso,
394     &mat_mkl_cpardiso->err);
395   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
396 
397   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
398   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
399   PetscFunctionReturn(0);
400 }
401 
402 /* Sets mkl_cpardiso options from the options database */
403 #undef __FUNCT__
404 #define __FUNCT__ "PetscSetMKL_CPARDISOFromOptions"
405 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A){
406   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr;
407   PetscErrorCode      ierr;
408   PetscInt            icntl;
409   PetscBool           flg;
410   int                 pt[IPARM_SIZE], threads;
411 
412   PetscFunctionBegin;
413   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr);
414   ierr = PetscOptionsInt("-mat_mkl_cpardiso_65",
415     "Number of thrads to use",
416     "None",
417     threads,
418     &threads,
419     &flg);CHKERRQ(ierr);
420   if (flg) mkl_set_num_threads(threads);
421 
422   ierr = PetscOptionsInt("-mat_mkl_cpardiso_66",
423     "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time",
424     "None",
425      mat_mkl_cpardiso->maxfct,
426     &icntl,
427     &flg);CHKERRQ(ierr);
428   if (flg) mat_mkl_cpardiso->maxfct = icntl;
429 
430   ierr = PetscOptionsInt("-mat_mkl_cpardiso_67",
431     "Indicates the actual matrix for the solution phase",
432     "None",
433     mat_mkl_cpardiso->mnum,
434     &icntl,
435     &flg);CHKERRQ(ierr);
436   if (flg) mat_mkl_cpardiso->mnum = icntl;
437 
438   ierr = PetscOptionsInt("-mat_mkl_cpardiso_68",
439     "Message level information",
440     "None",
441     mat_mkl_cpardiso->msglvl,
442     &icntl,
443     &flg);CHKERRQ(ierr);
444   if (flg) mat_mkl_cpardiso->msglvl = icntl;
445 
446   ierr = PetscOptionsInt("-mat_mkl_cpardiso_69",
447     "Defines the matrix type",
448     "None",
449     mat_mkl_cpardiso->mtype,
450     &icntl,
451     &flg);CHKERRQ(ierr);
452   if(flg){
453     mat_mkl_cpardiso->mtype = icntl;
454 #if defined(PETSC_USE_REAL_SINGLE)
455     mat_mkl_cpardiso->iparm[27] = 1;
456 #else
457     mat_mkl_cpardiso->iparm[27] = 0;
458 #endif
459     mat_mkl_cpardiso->iparm[34] = 1;
460   }
461   ierr = PetscOptionsInt("-mat_mkl_cpardiso_1",
462     "Use default values",
463     "None",
464     mat_mkl_cpardiso->iparm[0],
465     &icntl,
466     &flg);CHKERRQ(ierr);
467 
468   if(flg && icntl != 0){
469     ierr = PetscOptionsInt("-mat_mkl_cpardiso_2",
470       "Fill-in reducing ordering for the input matrix",
471       "None",
472       mat_mkl_cpardiso->iparm[1],
473       &icntl,
474       &flg);CHKERRQ(ierr);
475     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
476 
477     ierr = PetscOptionsInt("-mat_mkl_cpardiso_4",
478       "Preconditioned CGS/CG",
479       "None",
480       mat_mkl_cpardiso->iparm[3],
481       &icntl,
482       &flg);CHKERRQ(ierr);
483     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
484 
485     ierr = PetscOptionsInt("-mat_mkl_cpardiso_5",
486       "User permutation",
487       "None",
488       mat_mkl_cpardiso->iparm[4],
489       &icntl,
490       &flg);CHKERRQ(ierr);
491     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
492 
493     ierr = PetscOptionsInt("-mat_mkl_cpardiso_6",
494       "Write solution on x",
495       "None",
496       mat_mkl_cpardiso->iparm[5],
497       &icntl,
498       &flg);CHKERRQ(ierr);
499     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
500 
501     ierr = PetscOptionsInt("-mat_mkl_cpardiso_8",
502       "Iterative refinement step",
503       "None",
504       mat_mkl_cpardiso->iparm[7],
505       &icntl,
506       &flg);CHKERRQ(ierr);
507     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
508 
509     ierr = PetscOptionsInt("-mat_mkl_cpardiso_10",
510       "Pivoting perturbation",
511       "None",
512       mat_mkl_cpardiso->iparm[9],
513       &icntl,
514       &flg);CHKERRQ(ierr);
515     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
516 
517     ierr = PetscOptionsInt("-mat_mkl_cpardiso_11",
518       "Scaling vectors",
519       "None",
520       mat_mkl_cpardiso->iparm[10],
521       &icntl,
522       &flg);CHKERRQ(ierr);
523     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
524 
525     ierr = PetscOptionsInt("-mat_mkl_cpardiso_12",
526       "Solve with transposed or conjugate transposed matrix A",
527       "None",
528       mat_mkl_cpardiso->iparm[11],
529       &icntl,
530       &flg);CHKERRQ(ierr);
531     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
532 
533     ierr = PetscOptionsInt("-mat_mkl_cpardiso_13",
534       "Improved accuracy using (non-) symmetric weighted matching",
535       "None",
536       mat_mkl_cpardiso->iparm[12],
537       &icntl,
538       &flg);CHKERRQ(ierr);
539     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
540 
541     ierr = PetscOptionsInt("-mat_mkl_cpardiso_18",
542       "Numbers of non-zero elements",
543       "None",
544       mat_mkl_cpardiso->iparm[17],
545       &icntl,
546       &flg);CHKERRQ(ierr);
547     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
548 
549     ierr = PetscOptionsInt("-mat_mkl_cpardiso_19",
550       "Report number of floating point operations",
551       "None",
552       mat_mkl_cpardiso->iparm[18],
553       &icntl,
554       &flg);CHKERRQ(ierr);
555     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
556 
557     ierr = PetscOptionsInt("-mat_mkl_cpardiso_21",
558       "Pivoting for symmetric indefinite matrices",
559       "None",
560       mat_mkl_cpardiso->iparm[20],
561       &icntl,
562       &flg);CHKERRQ(ierr);
563     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
564 
565     ierr = PetscOptionsInt("-mat_mkl_cpardiso_24",
566       "Parallel factorization control",
567       "None",
568       mat_mkl_cpardiso->iparm[23],
569       &icntl,
570       &flg);CHKERRQ(ierr);
571     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
572 
573     ierr = PetscOptionsInt("-mat_mkl_cpardiso_25",
574       "Parallel forward/backward solve control",
575       "None",
576       mat_mkl_cpardiso->iparm[24],
577       &icntl,
578       &flg);CHKERRQ(ierr);
579     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
580 
581     ierr = PetscOptionsInt("-mat_mkl_cpardiso_27",
582       "Matrix checker",
583       "None",
584       mat_mkl_cpardiso->iparm[26],
585       &icntl,
586       &flg);CHKERRQ(ierr);
587     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
588 
589     ierr = PetscOptionsInt("-mat_mkl_cpardiso_31",
590       "Partial solve and computing selected components of the solution vectors",
591       "None",
592       mat_mkl_cpardiso->iparm[30],
593       &icntl,
594       &flg);CHKERRQ(ierr);
595     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
596 
597     ierr = PetscOptionsInt("-mat_mkl_cpardiso_34",
598       "Optimal number of threads for conditional numerical reproducibility (CNR) mode",
599       "None",
600       mat_mkl_cpardiso->iparm[33],
601       &icntl,
602       &flg);CHKERRQ(ierr);
603     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
604 
605     ierr = PetscOptionsInt("-mat_mkl_cpardiso_60",
606       "Intel MKL_CPARDISO mode",
607       "None",
608       mat_mkl_cpardiso->iparm[59],
609       &icntl,
610       &flg);CHKERRQ(ierr);
611     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
612   }
613 
614   PetscOptionsEnd();
615   PetscFunctionReturn(0);
616 }
617 
618 #undef __FUNCT__
619 #define __FUNCT__ "PetscInitialize_MKL_CPARDISO"
620 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso){
621   PetscErrorCode  ierr;
622   PetscInt        i;
623   int SizeComm = -1;
624 
625   PetscFunctionBegin;
626 
627   ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr);
628   ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &SizeComm);CHKERRQ(ierr);
629 
630   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
631   mat_mkl_cpardiso->maxfct = 1;
632   mat_mkl_cpardiso->mnum = 1;
633   mat_mkl_cpardiso->n = A->rmap->N;
634   mat_mkl_cpardiso->msglvl = 0;
635   mat_mkl_cpardiso->nrhs = 1;
636   mat_mkl_cpardiso->err = 0;
637   mat_mkl_cpardiso->phase = -1;
638 #if defined(PETSC_USE_COMPLEX)
639   mat_mkl_cpardiso->mtype = 13;
640 #else
641   mat_mkl_cpardiso->mtype = 11;
642 #endif
643 
644 #if defined(PETSC_USE_REAL_SINGLE)
645   mat_mkl_cpardiso->iparm[27] = 1;
646 #else
647   mat_mkl_cpardiso->iparm[27] = 0;
648 #endif
649 
650   mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
651 
652   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
653   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
654   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
655   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
656   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
657   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
658   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
659   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
660   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
661   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */
662 
663   mat_mkl_cpardiso->iparm[39] = 0;
664   if (SizeComm > 1) {
665     mat_mkl_cpardiso->iparm[39] = 2;
666     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
667     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
668   }
669 
670   mat_mkl_cpardiso->perm = 0;
671 
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676  * Symbolic decomposition. Mkl_Pardiso analysis phase.
677  */
678 #undef __FUNCT__
679 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_CPARDISO"
680 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info){
681 
682   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr;
683   PetscErrorCode ierr;
684 
685   PetscFunctionBegin;
686 
687   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
688 
689   /* Set MKL_CPARDISO options from the options database */
690   ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr);
691 
692   ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_INITIAL_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a);CHKERRQ(ierr);
693 
694   mat_mkl_cpardiso->n = A->rmap->N;
695 
696   /* analysis phase */
697   /*----------------*/
698   mat_mkl_cpardiso->phase = JOB_ANALYSIS;
699 
700   cluster_sparse_solver (
701     mat_mkl_cpardiso->pt,
702     &mat_mkl_cpardiso->maxfct,
703     &mat_mkl_cpardiso->mnum,
704     &mat_mkl_cpardiso->mtype,
705     &mat_mkl_cpardiso->phase,
706     &mat_mkl_cpardiso->n,
707     mat_mkl_cpardiso->a,
708     mat_mkl_cpardiso->ia,
709     mat_mkl_cpardiso->ja,
710     mat_mkl_cpardiso->perm,
711     &mat_mkl_cpardiso->nrhs,
712     mat_mkl_cpardiso->iparm,
713     &mat_mkl_cpardiso->msglvl,
714     NULL,
715     NULL,
716     &mat_mkl_cpardiso->comm_mkl_cpardiso,
717     &mat_mkl_cpardiso->err);
718 
719   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
720 
721   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
722   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
723   F->ops->solve           = MatSolve_MKL_CPARDISO;
724   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
725   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatView_MKL_CPARDISO"
731 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer){
732   PetscErrorCode    ierr;
733   PetscBool         iascii;
734   PetscViewerFormat format;
735   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
736   PetscInt          i;
737 
738   PetscFunctionBegin;
739   /* check if matrix is mkl_cpardiso type */
740   if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0);
741 
742   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
743   if (iascii) {
744     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
745     if (format == PETSC_VIEWER_ASCII_INFO) {
746       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr);
747       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr);
748       for(i = 1; i <= 64; i++){
749         ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr);
750       }
751       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr);
752       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr);
753       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr);
754       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr);
755       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr);
756       ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr);
757     }
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatGetInfo_MKL_CPARDISO"
764 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info){
765   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->spptr;
766 
767   PetscFunctionBegin;
768   info->block_size        = 1.0;
769   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
770   info->nz_unneeded       = 0.0;
771   info->assemblies        = 0.0;
772   info->mallocs           = 0.0;
773   info->memory            = 0.0;
774   info->fill_ratio_given  = 0;
775   info->fill_ratio_needed = 0;
776   info->factor_mallocs    = 0;
777   PetscFunctionReturn(0);
778 }
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "MatMkl_CPardisoSetCntl_MKL_CPARDISO"
782 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival){
783   Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->spptr;
784   PetscFunctionBegin;
785   if(icntl <= 64){
786     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
787   } else {
788     if(icntl == 65)
789       mkl_set_num_threads((int)ival);
790     else if(icntl == 66)
791       mat_mkl_cpardiso->maxfct = ival;
792     else if(icntl == 67)
793       mat_mkl_cpardiso->mnum = ival;
794     else if(icntl == 68)
795       mat_mkl_cpardiso->msglvl = ival;
796     else if(icntl == 69){
797       int pt[IPARM_SIZE];
798       mat_mkl_cpardiso->mtype = ival;
799 #if defined(PETSC_USE_REAL_SINGLE)
800       mat_mkl_cpardiso->iparm[27] = 1;
801 #else
802       mat_mkl_cpardiso->iparm[27] = 0;
803 #endif
804       mat_mkl_cpardiso->iparm[34] = 1;
805     }
806   }
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatMkl_CPardisoSetCntl"
812 /*@
813   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
814 
815    Logically Collective on Mat
816 
817    Input Parameters:
818 +  F - the factored matrix obtained by calling MatGetFactor()
819 .  icntl - index of Mkl_Pardiso parameter
820 -  ival - value of Mkl_Pardiso parameter
821 
822   Options Database:
823 .   -mat_mkl_cpardiso_<icntl> <ival>
824 
825    Level: beginner
826 
827    References: Mkl_Pardiso Users' Guide
828 
829 .seealso: MatGetFactor()
830 @*/
831 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
832 {
833   PetscErrorCode ierr;
834 
835   PetscFunctionBegin;
836   ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
837   PetscFunctionReturn(0);
838 }
839 
840 #undef __FUNCT__
841 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_cpardiso"
842 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type){
843   PetscFunctionBegin;
844   *type = MATSOLVERMKL_CPARDISO;
845   PetscFunctionReturn(0);
846 }
847 
848 /* MatGetFactor for MPI AIJ matrices */
849 #undef __FUNCT__
850 #define __FUNCT__ "MatGetFactor_mpiaij_mkl_cpardiso"
851 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F){
852   Mat            B;
853   PetscErrorCode ierr;
854   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
855   PetscBool      isSeqAIJ;
856 
857   PetscFunctionBegin;
858   /* Create the factorization matrix */
859 
860   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
861   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
862   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
863   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
864 
865   ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr);
866 
867   if (isSeqAIJ) {
868     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
869   mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
870   } else {
871     mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
872     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
873   }
874 
875   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
876   B->ops->destroy = MatDestroy_MKL_CPARDISO;
877 
878   B->ops->view    = MatView_MKL_CPARDISO;
879   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
880 
881   B->factortype   = ftype;
882   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
883 
884   B->spptr = mat_mkl_cpardiso;
885 
886   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr);
887   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr);
888   ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr);
889 
890   *F = B;
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatSolverPackageRegister_MKL_CPardiso"
896 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void)
897 {
898   PetscErrorCode ierr;
899   PetscFunctionBegin;
900   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
901   ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904