1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <math.h> 11 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 12 #define MKL_ILP64 13 #endif 14 #include <mkl.h> 15 #include <mkl_cluster_sparse_solver.h> 16 17 /* 18 * Possible mkl_cpardiso phases that controls the execution of the solver. 19 * For more information check mkl_cpardiso manual. 20 */ 21 #define JOB_ANALYSIS 11 22 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 23 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 24 #define JOB_NUMERICAL_FACTORIZATION 22 25 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 26 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 27 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 28 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 29 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 30 #define JOB_RELEASE_OF_LU_MEMORY 0 31 #define JOB_RELEASE_OF_ALL_MEMORY -1 32 33 #define IPARM_SIZE 64 34 #define INT_TYPE MKL_INT 35 36 static const char *Err_MSG_CPardiso(int errNo){ 37 switch (errNo) { 38 case -1: 39 return "input inconsistent"; break; 40 case -2: 41 return "not enough memory"; break; 42 case -3: 43 return "reordering problem"; break; 44 case -4: 45 return "zero pivot, numerical factorization or iterative refinement problem"; break; 46 case -5: 47 return "unclassified (internal) error"; break; 48 case -6: 49 return "preordering failed (matrix types 11, 13 only)"; break; 50 case -7: 51 return "diagonal matrix problem"; break; 52 case -8: 53 return "32-bit integer overflow problem"; break; 54 case -9: 55 return "not enough memory for OOC"; break; 56 case -10: 57 return "problems with opening OOC temporary files"; break; 58 case -11: 59 return "read/write problems with the OOC data file"; break; 60 default : 61 return "unknown error"; 62 } 63 } 64 65 /* 66 * Internal data structure. 67 * For more information check mkl_cpardiso manual. 68 */ 69 70 typedef struct { 71 72 /* Configuration vector */ 73 INT_TYPE iparm[IPARM_SIZE]; 74 75 /* 76 * Internal mkl_cpardiso memory location. 77 * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak. 78 */ 79 void *pt[IPARM_SIZE]; 80 81 MPI_Comm comm_mkl_cpardiso; 82 83 /* Basic mkl_cpardiso info*/ 84 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 85 86 /* Matrix structure */ 87 PetscScalar *a; 88 89 INT_TYPE *ia, *ja; 90 91 /* Number of non-zero elements */ 92 INT_TYPE nz; 93 94 /* Row permutaton vector*/ 95 INT_TYPE *perm; 96 97 /* Define is matrix preserve sparce structure. */ 98 MatStructure matstruc; 99 100 PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**); 101 102 /* True if mkl_cpardiso function have been used. */ 103 PetscBool CleanUp; 104 } Mat_MKL_CPARDISO; 105 106 /* 107 * Copy the elements of matrix A. 108 * Input: 109 * - Mat A: MATSEQAIJ matrix 110 * - int shift: matrix index. 111 * - 0 for c representation 112 * - 1 for fortran representation 113 * - MatReuse reuse: 114 * - MAT_INITIAL_MATRIX: Create a new aij representation 115 * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values 116 * Output: 117 * - int *nnz: Number of nonzero-elements. 118 * - int **r pointer to i index 119 * - int **c pointer to j elements 120 * - MATRIXTYPE **v: Non-zero elements 121 */ 122 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 123 { 124 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 125 126 PetscFunctionBegin; 127 *v=aa->a; 128 if (reuse == MAT_INITIAL_MATRIX) { 129 *r = (INT_TYPE*)aa->i; 130 *c = (INT_TYPE*)aa->j; 131 *nnz = aa->nz; 132 } 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 137 { 138 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 139 PetscErrorCode ierr; 140 PetscInt rstart,nz,i,j,countA,countB; 141 PetscInt *row,*col; 142 const PetscScalar *av, *bv; 143 PetscScalar *val; 144 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 145 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 146 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 147 PetscInt colA_start,jB,jcol; 148 149 PetscFunctionBegin; 150 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 151 av=aa->a; bv=bb->a; 152 153 garray = mat->garray; 154 155 if (reuse == MAT_INITIAL_MATRIX) { 156 nz = aa->nz + bb->nz; 157 *nnz = nz; 158 ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr); 159 col = row + m + 1; 160 val = (PetscScalar*)(col + nz); 161 *r = row; *c = col; *v = val; 162 row[0] = 0; 163 } else { 164 row = *r; col = *c; val = *v; 165 } 166 167 nz = 0; 168 for (i=0; i<m; i++) { 169 row[i] = nz; 170 countA = ai[i+1] - ai[i]; 171 countB = bi[i+1] - bi[i]; 172 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 173 bjj = bj + bi[i]; 174 175 /* B part, smaller col index */ 176 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 177 jB = 0; 178 for (j=0; j<countB; j++) { 179 jcol = garray[bjj[j]]; 180 if (jcol > colA_start) { 181 jB = j; 182 break; 183 } 184 col[nz] = jcol; 185 val[nz++] = *bv++; 186 if (j==countB-1) jB = countB; 187 } 188 189 /* A part */ 190 for (j=0; j<countA; j++) { 191 col[nz] = rstart + ajj[j]; 192 val[nz++] = *av++; 193 } 194 195 /* B part, larger col index */ 196 for (j=jB; j<countB; j++) { 197 col[nz] = garray[bjj[j]]; 198 val[nz++] = *bv++; 199 } 200 } 201 row[m] = nz; 202 203 PetscFunctionReturn(0); 204 } 205 206 /* 207 * Free memory for Mat_MKL_CPARDISO structure and pointers to objects. 208 */ 209 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A) 210 { 211 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 212 PetscErrorCode ierr; 213 214 PetscFunctionBegin; 215 /* Terminate instance, deallocate memories */ 216 if (mat_mkl_cpardiso->CleanUp) { 217 mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 218 219 cluster_sparse_solver ( 220 mat_mkl_cpardiso->pt, 221 &mat_mkl_cpardiso->maxfct, 222 &mat_mkl_cpardiso->mnum, 223 &mat_mkl_cpardiso->mtype, 224 &mat_mkl_cpardiso->phase, 225 &mat_mkl_cpardiso->n, 226 NULL, 227 NULL, 228 NULL, 229 mat_mkl_cpardiso->perm, 230 &mat_mkl_cpardiso->nrhs, 231 mat_mkl_cpardiso->iparm, 232 &mat_mkl_cpardiso->msglvl, 233 NULL, 234 NULL, 235 &mat_mkl_cpardiso->comm_mkl_cpardiso, 236 (int*)&mat_mkl_cpardiso->err); 237 } 238 239 if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) { 240 ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr); 241 } 242 ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 243 ierr = PetscFree(A->data);CHKERRQ(ierr); 244 245 /* clear composed functions */ 246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 247 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 /* 252 * Computes Ax = b 253 */ 254 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x) 255 { 256 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 257 PetscErrorCode ierr; 258 PetscScalar *xarray; 259 const PetscScalar *barray; 260 261 PetscFunctionBegin; 262 mat_mkl_cpardiso->nrhs = 1; 263 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 264 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 265 266 /* solve phase */ 267 /*-------------*/ 268 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 269 cluster_sparse_solver ( 270 mat_mkl_cpardiso->pt, 271 &mat_mkl_cpardiso->maxfct, 272 &mat_mkl_cpardiso->mnum, 273 &mat_mkl_cpardiso->mtype, 274 &mat_mkl_cpardiso->phase, 275 &mat_mkl_cpardiso->n, 276 mat_mkl_cpardiso->a, 277 mat_mkl_cpardiso->ia, 278 mat_mkl_cpardiso->ja, 279 mat_mkl_cpardiso->perm, 280 &mat_mkl_cpardiso->nrhs, 281 mat_mkl_cpardiso->iparm, 282 &mat_mkl_cpardiso->msglvl, 283 (void*)barray, 284 (void*)xarray, 285 &mat_mkl_cpardiso->comm_mkl_cpardiso, 286 (int*)&mat_mkl_cpardiso->err); 287 288 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 289 290 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 291 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 292 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 293 PetscFunctionReturn(0); 294 } 295 296 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x) 297 { 298 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 299 PetscErrorCode ierr; 300 301 PetscFunctionBegin; 302 #if defined(PETSC_USE_COMPLEX) 303 mat_mkl_cpardiso->iparm[12 - 1] = 1; 304 #else 305 mat_mkl_cpardiso->iparm[12 - 1] = 2; 306 #endif 307 ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr); 308 mat_mkl_cpardiso->iparm[12 - 1] = 0; 309 PetscFunctionReturn(0); 310 } 311 312 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X) 313 { 314 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 315 PetscErrorCode ierr; 316 PetscScalar *xarray; 317 const PetscScalar *barray; 318 319 PetscFunctionBegin; 320 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 321 322 if(mat_mkl_cpardiso->nrhs > 0){ 323 ierr = MatDenseGetArrayRead(B,&barray); 324 ierr = MatDenseGetArray(X,&xarray); 325 326 /* solve phase */ 327 /*-------------*/ 328 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 329 cluster_sparse_solver ( 330 mat_mkl_cpardiso->pt, 331 &mat_mkl_cpardiso->maxfct, 332 &mat_mkl_cpardiso->mnum, 333 &mat_mkl_cpardiso->mtype, 334 &mat_mkl_cpardiso->phase, 335 &mat_mkl_cpardiso->n, 336 mat_mkl_cpardiso->a, 337 mat_mkl_cpardiso->ia, 338 mat_mkl_cpardiso->ja, 339 mat_mkl_cpardiso->perm, 340 &mat_mkl_cpardiso->nrhs, 341 mat_mkl_cpardiso->iparm, 342 &mat_mkl_cpardiso->msglvl, 343 (void*)barray, 344 (void*)xarray, 345 &mat_mkl_cpardiso->comm_mkl_cpardiso, 346 (int*)&mat_mkl_cpardiso->err); 347 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 348 ierr = MatDenseRestoreArrayRead(B,&barray); 349 ierr = MatDenseRestoreArray(X,&xarray); 350 351 } 352 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 353 PetscFunctionReturn(0); 354 355 } 356 357 /* 358 * LU Decomposition 359 */ 360 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info) 361 { 362 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data; 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 367 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 368 369 mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION; 370 cluster_sparse_solver ( 371 mat_mkl_cpardiso->pt, 372 &mat_mkl_cpardiso->maxfct, 373 &mat_mkl_cpardiso->mnum, 374 &mat_mkl_cpardiso->mtype, 375 &mat_mkl_cpardiso->phase, 376 &mat_mkl_cpardiso->n, 377 mat_mkl_cpardiso->a, 378 mat_mkl_cpardiso->ia, 379 mat_mkl_cpardiso->ja, 380 mat_mkl_cpardiso->perm, 381 &mat_mkl_cpardiso->nrhs, 382 mat_mkl_cpardiso->iparm, 383 &mat_mkl_cpardiso->msglvl, 384 NULL, 385 NULL, 386 &mat_mkl_cpardiso->comm_mkl_cpardiso, 387 &mat_mkl_cpardiso->err); 388 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 389 390 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 391 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 392 PetscFunctionReturn(0); 393 } 394 395 /* Sets mkl_cpardiso options from the options database */ 396 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A) 397 { 398 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 399 PetscErrorCode ierr; 400 PetscInt icntl; 401 PetscBool flg; 402 int threads; 403 404 PetscFunctionBegin; 405 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr); 406 ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr); 407 if (flg) mkl_set_num_threads(threads); 408 409 ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr); 410 if (flg) mat_mkl_cpardiso->maxfct = icntl; 411 412 ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr); 413 if (flg) mat_mkl_cpardiso->mnum = icntl; 414 415 ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr); 416 if (flg) mat_mkl_cpardiso->msglvl = icntl; 417 418 ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr); 419 if(flg){ 420 mat_mkl_cpardiso->mtype = icntl; 421 #if defined(PETSC_USE_REAL_SINGLE) 422 mat_mkl_cpardiso->iparm[27] = 1; 423 #else 424 mat_mkl_cpardiso->iparm[27] = 0; 425 #endif 426 mat_mkl_cpardiso->iparm[34] = 1; 427 } 428 ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr); 429 430 if(flg && icntl != 0){ 431 ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr); 432 if (flg) mat_mkl_cpardiso->iparm[1] = icntl; 433 434 ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr); 435 if (flg) mat_mkl_cpardiso->iparm[3] = icntl; 436 437 ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr); 438 if (flg) mat_mkl_cpardiso->iparm[4] = icntl; 439 440 ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr); 441 if (flg) mat_mkl_cpardiso->iparm[5] = icntl; 442 443 ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr); 444 if (flg) mat_mkl_cpardiso->iparm[7] = icntl; 445 446 ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr); 447 if (flg) mat_mkl_cpardiso->iparm[9] = icntl; 448 449 ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr); 450 if (flg) mat_mkl_cpardiso->iparm[10] = icntl; 451 452 ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr); 453 if (flg) mat_mkl_cpardiso->iparm[11] = icntl; 454 455 ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl, 456 &flg);CHKERRQ(ierr); 457 if (flg) mat_mkl_cpardiso->iparm[12] = icntl; 458 459 ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl, 460 &flg);CHKERRQ(ierr); 461 if (flg) mat_mkl_cpardiso->iparm[17] = icntl; 462 463 ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr); 464 if (flg) mat_mkl_cpardiso->iparm[18] = icntl; 465 466 ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr); 467 if (flg) mat_mkl_cpardiso->iparm[20] = icntl; 468 469 ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr); 470 if (flg) mat_mkl_cpardiso->iparm[23] = icntl; 471 472 ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr); 473 if (flg) mat_mkl_cpardiso->iparm[24] = icntl; 474 475 ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr); 476 if (flg) mat_mkl_cpardiso->iparm[26] = icntl; 477 478 ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr); 479 if (flg) mat_mkl_cpardiso->iparm[30] = icntl; 480 481 ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr); 482 if (flg) mat_mkl_cpardiso->iparm[33] = icntl; 483 484 ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr); 485 if (flg) mat_mkl_cpardiso->iparm[59] = icntl; 486 } 487 488 ierr = PetscOptionsEnd();CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso) 493 { 494 PetscErrorCode ierr; 495 PetscMPIInt size; 496 497 PetscFunctionBegin; 498 499 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 500 ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr); 501 502 mat_mkl_cpardiso->CleanUp = PETSC_FALSE; 503 mat_mkl_cpardiso->maxfct = 1; 504 mat_mkl_cpardiso->mnum = 1; 505 mat_mkl_cpardiso->n = A->rmap->N; 506 mat_mkl_cpardiso->msglvl = 0; 507 mat_mkl_cpardiso->nrhs = 1; 508 mat_mkl_cpardiso->err = 0; 509 mat_mkl_cpardiso->phase = -1; 510 #if defined(PETSC_USE_COMPLEX) 511 mat_mkl_cpardiso->mtype = 13; 512 #else 513 mat_mkl_cpardiso->mtype = 11; 514 #endif 515 516 #if defined(PETSC_USE_REAL_SINGLE) 517 mat_mkl_cpardiso->iparm[27] = 1; 518 #else 519 mat_mkl_cpardiso->iparm[27] = 0; 520 #endif 521 522 mat_mkl_cpardiso->iparm[34] = 1; /* C style */ 523 524 mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 525 mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */ 526 mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */ 527 mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */ 528 mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 529 mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 530 mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 531 mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 532 mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 533 mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */ 534 535 mat_mkl_cpardiso->iparm[39] = 0; 536 if (size > 1) { 537 mat_mkl_cpardiso->iparm[39] = 2; 538 mat_mkl_cpardiso->iparm[40] = A->rmap->rstart; 539 mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1; 540 } 541 mat_mkl_cpardiso->perm = 0; 542 PetscFunctionReturn(0); 543 } 544 545 /* 546 * Symbolic decomposition. Mkl_Pardiso analysis phase. 547 */ 548 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 549 { 550 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 555 556 /* Set MKL_CPARDISO options from the options database */ 557 ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr); 558 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 559 560 mat_mkl_cpardiso->n = A->rmap->N; 561 562 /* analysis phase */ 563 /*----------------*/ 564 mat_mkl_cpardiso->phase = JOB_ANALYSIS; 565 566 cluster_sparse_solver ( 567 mat_mkl_cpardiso->pt, 568 &mat_mkl_cpardiso->maxfct, 569 &mat_mkl_cpardiso->mnum, 570 &mat_mkl_cpardiso->mtype, 571 &mat_mkl_cpardiso->phase, 572 &mat_mkl_cpardiso->n, 573 mat_mkl_cpardiso->a, 574 mat_mkl_cpardiso->ia, 575 mat_mkl_cpardiso->ja, 576 mat_mkl_cpardiso->perm, 577 &mat_mkl_cpardiso->nrhs, 578 mat_mkl_cpardiso->iparm, 579 &mat_mkl_cpardiso->msglvl, 580 NULL, 581 NULL, 582 &mat_mkl_cpardiso->comm_mkl_cpardiso, 583 (int*)&mat_mkl_cpardiso->err); 584 585 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 586 587 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 588 F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO; 589 F->ops->solve = MatSolve_MKL_CPARDISO; 590 F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 591 F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 592 PetscFunctionReturn(0); 593 } 594 595 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer) 596 { 597 PetscErrorCode ierr; 598 PetscBool iascii; 599 PetscViewerFormat format; 600 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 601 PetscInt i; 602 603 PetscFunctionBegin; 604 /* check if matrix is mkl_cpardiso type */ 605 if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0); 606 607 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 608 if (iascii) { 609 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 610 if (format == PETSC_VIEWER_ASCII_INFO) { 611 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr); 612 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr); 613 for(i = 1; i <= 64; i++){ 614 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr); 615 } 616 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr); 617 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr); 618 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr); 619 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr); 620 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 621 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr); 622 } 623 } 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info) 628 { 629 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data; 630 631 PetscFunctionBegin; 632 info->block_size = 1.0; 633 info->nz_allocated = mat_mkl_cpardiso->nz + 0.0; 634 info->nz_unneeded = 0.0; 635 info->assemblies = 0.0; 636 info->mallocs = 0.0; 637 info->memory = 0.0; 638 info->fill_ratio_given = 0; 639 info->fill_ratio_needed = 0; 640 info->factor_mallocs = 0; 641 PetscFunctionReturn(0); 642 } 643 644 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival) 645 { 646 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data; 647 648 PetscFunctionBegin; 649 if(icntl <= 64){ 650 mat_mkl_cpardiso->iparm[icntl - 1] = ival; 651 } else { 652 if(icntl == 65) mkl_set_num_threads((int)ival); 653 else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival; 654 else if(icntl == 67) mat_mkl_cpardiso->mnum = ival; 655 else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival; 656 else if(icntl == 69){ 657 mat_mkl_cpardiso->mtype = ival; 658 #if defined(PETSC_USE_REAL_SINGLE) 659 mat_mkl_cpardiso->iparm[27] = 1; 660 #else 661 mat_mkl_cpardiso->iparm[27] = 0; 662 #endif 663 mat_mkl_cpardiso->iparm[34] = 1; 664 } 665 } 666 PetscFunctionReturn(0); 667 } 668 669 /*@ 670 MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters 671 672 Logically Collective on Mat 673 674 Input Parameters: 675 + F - the factored matrix obtained by calling MatGetFactor() 676 . icntl - index of Mkl_Pardiso parameter 677 - ival - value of Mkl_Pardiso parameter 678 679 Options Database: 680 . -mat_mkl_cpardiso_<icntl> <ival> 681 682 Level: Intermediate 683 684 Notes: 685 This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options 686 database approach when working with TS, SNES, or KSP. 687 688 References: 689 . Mkl_Pardiso Users' Guide 690 691 .seealso: MatGetFactor() 692 @*/ 693 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 694 { 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 702 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type) 703 { 704 PetscFunctionBegin; 705 *type = MATSOLVERMKL_CPARDISO; 706 PetscFunctionReturn(0); 707 } 708 709 /* MatGetFactor for MPI AIJ matrices */ 710 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F) 711 { 712 Mat B; 713 PetscErrorCode ierr; 714 Mat_MKL_CPARDISO *mat_mkl_cpardiso; 715 PetscBool isSeqAIJ; 716 717 PetscFunctionBegin; 718 /* Create the factorization matrix */ 719 720 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 721 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 722 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 723 ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr); 724 ierr = MatSetUp(B);CHKERRQ(ierr); 725 726 ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr); 727 728 if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO; 729 else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO; 730 731 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO; 732 B->ops->destroy = MatDestroy_MKL_CPARDISO; 733 734 B->ops->view = MatView_MKL_CPARDISO; 735 B->ops->getinfo = MatGetInfo_MKL_CPARDISO; 736 737 B->factortype = ftype; 738 B->assembled = PETSC_TRUE; /* required by -ksp_view */ 739 740 B->data = mat_mkl_cpardiso; 741 742 /* set solvertype */ 743 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 744 ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr); 745 746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr); 747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr); 748 ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr); 749 750 *F = B; 751 PetscFunctionReturn(0); 752 } 753 754 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void) 755 { 756 PetscErrorCode ierr; 757 758 PetscFunctionBegin; 759 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 760 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } 763