1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <math.h> 11 #include <mkl.h> 12 #include <mkl_cluster_sparse_solver.h> 13 14 /* 15 * Possible mkl_cpardiso phases that controls the execution of the solver. 16 * For more information check mkl_cpardiso manual. 17 */ 18 #define JOB_ANALYSIS 11 19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 21 #define JOB_NUMERICAL_FACTORIZATION 22 22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 27 #define JOB_RELEASE_OF_LU_MEMORY 0 28 #define JOB_RELEASE_OF_ALL_MEMORY -1 29 30 #define IPARM_SIZE 64 31 #define INT_TYPE MKL_INT 32 33 static const char *Err_MSG_CPardiso(int errNo){ 34 switch (errNo) { 35 case -1: 36 return "input inconsistent"; break; 37 case -2: 38 return "not enough memory"; break; 39 case -3: 40 return "reordering problem"; break; 41 case -4: 42 return "zero pivot, numerical factorization or iterative refinement problem"; break; 43 case -5: 44 return "unclassified (internal) error"; break; 45 case -6: 46 return "preordering failed (matrix types 11, 13 only)"; break; 47 case -7: 48 return "diagonal matrix problem"; break; 49 case -8: 50 return "32-bit integer overflow problem"; break; 51 case -9: 52 return "not enough memory for OOC"; break; 53 case -10: 54 return "problems with opening OOC temporary files"; break; 55 case -11: 56 return "read/write problems with the OOC data file"; break; 57 default : 58 return "unknown error"; 59 } 60 } 61 62 /* 63 * Internal data structure. 64 * For more information check mkl_cpardiso manual. 65 */ 66 67 typedef struct { 68 69 /* Configuration vector */ 70 INT_TYPE iparm[IPARM_SIZE]; 71 72 /* 73 * Internal mkl_cpardiso memory location. 74 * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak. 75 */ 76 void *pt[IPARM_SIZE]; 77 78 MPI_Comm comm_mkl_cpardiso; 79 80 /* Basic mkl_cpardiso info*/ 81 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 82 83 /* Matrix structure */ 84 PetscScalar *a; 85 86 INT_TYPE *ia, *ja; 87 88 /* Number of non-zero elements */ 89 INT_TYPE nz; 90 91 /* Row permutaton vector*/ 92 INT_TYPE *perm; 93 94 /* Define is matrix preserve sparce structure. */ 95 MatStructure matstruc; 96 97 PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**); 98 99 /* True if mkl_cpardiso function have been used. */ 100 PetscBool CleanUp; 101 } Mat_MKL_CPARDISO; 102 103 /* 104 * Copy the elements of matrix A. 105 * Input: 106 * - Mat A: MATSEQAIJ matrix 107 * - int shift: matrix index. 108 * - 0 for c representation 109 * - 1 for fortran representation 110 * - MatReuse reuse: 111 * - MAT_INITIAL_MATRIX: Create a new aij representation 112 * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values 113 * Output: 114 * - int *nnz: Number of nonzero-elements. 115 * - int **r pointer to i index 116 * - int **c pointer to j elements 117 * - MATRIXTYPE **v: Non-zero elements 118 */ 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatCopy_seqaij_seqaij_MKL_CPARDISO" 121 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 122 { 123 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 124 125 PetscFunctionBegin; 126 *v=aa->a; 127 if (reuse == MAT_INITIAL_MATRIX) { 128 *r = (INT_TYPE*)aa->i; 129 *c = (INT_TYPE*)aa->j; 130 *nnz = aa->nz; 131 } 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO" 137 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 138 { 139 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 140 PetscErrorCode ierr; 141 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 142 PetscInt *row,*col; 143 const PetscScalar *av, *bv,*v1,*v2; 144 PetscScalar *val; 145 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 146 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 147 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 148 PetscInt nn, colA_start,jB,jcol; 149 150 PetscFunctionBegin; 151 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 152 av=aa->a; bv=bb->a; 153 154 garray = mat->garray; 155 156 if (reuse == MAT_INITIAL_MATRIX) { 157 nz = aa->nz + bb->nz; 158 *nnz = nz; 159 ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr); 160 col = row + m + 1; 161 val = (PetscScalar*)(col + nz); 162 *r = row; *c = col; *v = val; 163 row[0] = 0; 164 } else { 165 row = *r; col = *c; val = *v; 166 } 167 168 nz = 0; 169 for (i=0; i<m; i++) { 170 row[i] = nz; 171 countA = ai[i+1] - ai[i]; 172 countB = bi[i+1] - bi[i]; 173 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 174 bjj = bj + bi[i]; 175 176 /* B part, smaller col index */ 177 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 178 jB = 0; 179 for (j=0; j<countB; j++) { 180 jcol = garray[bjj[j]]; 181 if (jcol > colA_start) { 182 jB = j; 183 break; 184 } 185 col[nz] = jcol; 186 val[nz++] = *bv++; 187 if (j==countB-1) jB = countB; 188 } 189 190 /* A part */ 191 for (j=0; j<countA; j++) { 192 col[nz] = rstart + ajj[j]; 193 val[nz++] = *av++; 194 } 195 196 /* B part, larger col index */ 197 for (j=jB; j<countB; j++) { 198 col[nz] = garray[bjj[j]]; 199 val[nz++] = *bv++; 200 } 201 } 202 row[m] = nz; 203 204 PetscFunctionReturn(0); 205 } 206 207 /* 208 * Free memory for Mat_MKL_CPARDISO structure and pointers to objects. 209 */ 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatDestroy_MKL_CPARDISO" 212 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A) 213 { 214 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 /* Terminate instance, deallocate memories */ 219 if (mat_mkl_cpardiso->CleanUp) { 220 mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 221 222 cluster_sparse_solver ( 223 mat_mkl_cpardiso->pt, 224 &mat_mkl_cpardiso->maxfct, 225 &mat_mkl_cpardiso->mnum, 226 &mat_mkl_cpardiso->mtype, 227 &mat_mkl_cpardiso->phase, 228 &mat_mkl_cpardiso->n, 229 NULL, 230 NULL, 231 NULL, 232 mat_mkl_cpardiso->perm, 233 &mat_mkl_cpardiso->nrhs, 234 mat_mkl_cpardiso->iparm, 235 &mat_mkl_cpardiso->msglvl, 236 NULL, 237 NULL, 238 &mat_mkl_cpardiso->comm_mkl_cpardiso, 239 &mat_mkl_cpardiso->err); 240 } 241 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 242 ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 243 244 /* clear composed functions */ 245 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr); 246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 * Computes Ax = b 252 */ 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSolve_MKL_CPARDISO" 255 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x) 256 { 257 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr; 258 PetscErrorCode ierr; 259 PetscScalar *barray, *xarray; 260 261 PetscFunctionBegin; 262 mat_mkl_cpardiso->nrhs = 1; 263 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 264 ierr = VecGetArray(b,&barray);CHKERRQ(ierr); 265 266 /* solve phase */ 267 /*-------------*/ 268 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 269 cluster_sparse_solver ( 270 mat_mkl_cpardiso->pt, 271 &mat_mkl_cpardiso->maxfct, 272 &mat_mkl_cpardiso->mnum, 273 &mat_mkl_cpardiso->mtype, 274 &mat_mkl_cpardiso->phase, 275 &mat_mkl_cpardiso->n, 276 mat_mkl_cpardiso->a, 277 mat_mkl_cpardiso->ia, 278 mat_mkl_cpardiso->ja, 279 mat_mkl_cpardiso->perm, 280 &mat_mkl_cpardiso->nrhs, 281 mat_mkl_cpardiso->iparm, 282 &mat_mkl_cpardiso->msglvl, 283 (void*)barray, 284 (void*)xarray, 285 &mat_mkl_cpardiso->comm_mkl_cpardiso, 286 &mat_mkl_cpardiso->err); 287 288 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 289 290 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 291 PetscFunctionReturn(0); 292 } 293 294 #undef __FUNCT__ 295 #define __FUNCT__ "MatSolveTranspose_MKL_CPARDISO" 296 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x) 297 { 298 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr; 299 PetscErrorCode ierr; 300 301 PetscFunctionBegin; 302 #if defined(PETSC_USE_COMPLEX) 303 mat_mkl_cpardiso->iparm[12 - 1] = 1; 304 #else 305 mat_mkl_cpardiso->iparm[12 - 1] = 2; 306 #endif 307 ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr); 308 mat_mkl_cpardiso->iparm[12 - 1] = 0; 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatMatSolve_MKL_CPARDISO" 314 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X) 315 { 316 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr; 317 PetscErrorCode ierr; 318 PetscScalar *barray, *xarray; 319 PetscBool flg; 320 321 PetscFunctionBegin; 322 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr); 323 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix"); 324 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr); 325 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix"); 326 327 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 328 329 if(mat_mkl_cpardiso->nrhs > 0){ 330 ierr = MatDenseGetArray(B,&barray); 331 ierr = MatDenseGetArray(X,&xarray); 332 333 /* solve phase */ 334 /*-------------*/ 335 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 336 cluster_sparse_solver ( 337 mat_mkl_cpardiso->pt, 338 &mat_mkl_cpardiso->maxfct, 339 &mat_mkl_cpardiso->mnum, 340 &mat_mkl_cpardiso->mtype, 341 &mat_mkl_cpardiso->phase, 342 &mat_mkl_cpardiso->n, 343 mat_mkl_cpardiso->a, 344 mat_mkl_cpardiso->ia, 345 mat_mkl_cpardiso->ja, 346 mat_mkl_cpardiso->perm, 347 &mat_mkl_cpardiso->nrhs, 348 mat_mkl_cpardiso->iparm, 349 &mat_mkl_cpardiso->msglvl, 350 (void*)barray, 351 (void*)xarray, 352 &mat_mkl_cpardiso->comm_mkl_cpardiso, 353 &mat_mkl_cpardiso->err); 354 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 355 } 356 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 357 PetscFunctionReturn(0); 358 359 } 360 361 /* 362 * LU Decomposition 363 */ 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatFactorNumeric_MKL_CPARDISO" 366 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info) 367 { 368 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->spptr; 369 PetscErrorCode ierr; 370 371 /* numerical factorization phase */ 372 /*-------------------------------*/ 373 374 PetscFunctionBegin; 375 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 376 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 377 378 /* numerical factorization phase */ 379 /*-------------------------------*/ 380 mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION; 381 cluster_sparse_solver ( 382 mat_mkl_cpardiso->pt, 383 &mat_mkl_cpardiso->maxfct, 384 &mat_mkl_cpardiso->mnum, 385 &mat_mkl_cpardiso->mtype, 386 &mat_mkl_cpardiso->phase, 387 &mat_mkl_cpardiso->n, 388 mat_mkl_cpardiso->a, 389 mat_mkl_cpardiso->ia, 390 mat_mkl_cpardiso->ja, 391 mat_mkl_cpardiso->perm, 392 &mat_mkl_cpardiso->nrhs, 393 mat_mkl_cpardiso->iparm, 394 &mat_mkl_cpardiso->msglvl, 395 NULL, 396 NULL, 397 &mat_mkl_cpardiso->comm_mkl_cpardiso, 398 &mat_mkl_cpardiso->err); 399 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 400 401 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 402 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 403 PetscFunctionReturn(0); 404 } 405 406 /* Sets mkl_cpardiso options from the options database */ 407 #undef __FUNCT__ 408 #define __FUNCT__ "PetscSetMKL_CPARDISOFromOptions" 409 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A) 410 { 411 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr; 412 PetscErrorCode ierr; 413 PetscInt icntl; 414 PetscBool flg; 415 int pt[IPARM_SIZE], threads; 416 417 PetscFunctionBegin; 418 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr); 419 ierr = PetscOptionsInt("-mat_mkl_cpardiso_65", 420 "Number of thrads to use", 421 "None", 422 threads, 423 &threads, 424 &flg);CHKERRQ(ierr); 425 if (flg) mkl_set_num_threads(threads); 426 427 ierr = PetscOptionsInt("-mat_mkl_cpardiso_66", 428 "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time", 429 "None", 430 mat_mkl_cpardiso->maxfct, 431 &icntl, 432 &flg);CHKERRQ(ierr); 433 if (flg) mat_mkl_cpardiso->maxfct = icntl; 434 435 ierr = PetscOptionsInt("-mat_mkl_cpardiso_67", 436 "Indicates the actual matrix for the solution phase", 437 "None", 438 mat_mkl_cpardiso->mnum, 439 &icntl, 440 &flg);CHKERRQ(ierr); 441 if (flg) mat_mkl_cpardiso->mnum = icntl; 442 443 ierr = PetscOptionsInt("-mat_mkl_cpardiso_68", 444 "Message level information", 445 "None", 446 mat_mkl_cpardiso->msglvl, 447 &icntl, 448 &flg);CHKERRQ(ierr); 449 if (flg) mat_mkl_cpardiso->msglvl = icntl; 450 451 ierr = PetscOptionsInt("-mat_mkl_cpardiso_69", 452 "Defines the matrix type", 453 "None", 454 mat_mkl_cpardiso->mtype, 455 &icntl, 456 &flg);CHKERRQ(ierr); 457 if(flg){ 458 mat_mkl_cpardiso->mtype = icntl; 459 #if defined(PETSC_USE_REAL_SINGLE) 460 mat_mkl_cpardiso->iparm[27] = 1; 461 #else 462 mat_mkl_cpardiso->iparm[27] = 0; 463 #endif 464 mat_mkl_cpardiso->iparm[34] = 1; 465 } 466 ierr = PetscOptionsInt("-mat_mkl_cpardiso_1", 467 "Use default values", 468 "None", 469 mat_mkl_cpardiso->iparm[0], 470 &icntl, 471 &flg);CHKERRQ(ierr); 472 473 if(flg && icntl != 0){ 474 ierr = PetscOptionsInt("-mat_mkl_cpardiso_2", 475 "Fill-in reducing ordering for the input matrix", 476 "None", 477 mat_mkl_cpardiso->iparm[1], 478 &icntl, 479 &flg);CHKERRQ(ierr); 480 if (flg) mat_mkl_cpardiso->iparm[1] = icntl; 481 482 ierr = PetscOptionsInt("-mat_mkl_cpardiso_4", 483 "Preconditioned CGS/CG", 484 "None", 485 mat_mkl_cpardiso->iparm[3], 486 &icntl, 487 &flg);CHKERRQ(ierr); 488 if (flg) mat_mkl_cpardiso->iparm[3] = icntl; 489 490 ierr = PetscOptionsInt("-mat_mkl_cpardiso_5", 491 "User permutation", 492 "None", 493 mat_mkl_cpardiso->iparm[4], 494 &icntl, 495 &flg);CHKERRQ(ierr); 496 if (flg) mat_mkl_cpardiso->iparm[4] = icntl; 497 498 ierr = PetscOptionsInt("-mat_mkl_cpardiso_6", 499 "Write solution on x", 500 "None", 501 mat_mkl_cpardiso->iparm[5], 502 &icntl, 503 &flg);CHKERRQ(ierr); 504 if (flg) mat_mkl_cpardiso->iparm[5] = icntl; 505 506 ierr = PetscOptionsInt("-mat_mkl_cpardiso_8", 507 "Iterative refinement step", 508 "None", 509 mat_mkl_cpardiso->iparm[7], 510 &icntl, 511 &flg);CHKERRQ(ierr); 512 if (flg) mat_mkl_cpardiso->iparm[7] = icntl; 513 514 ierr = PetscOptionsInt("-mat_mkl_cpardiso_10", 515 "Pivoting perturbation", 516 "None", 517 mat_mkl_cpardiso->iparm[9], 518 &icntl, 519 &flg);CHKERRQ(ierr); 520 if (flg) mat_mkl_cpardiso->iparm[9] = icntl; 521 522 ierr = PetscOptionsInt("-mat_mkl_cpardiso_11", 523 "Scaling vectors", 524 "None", 525 mat_mkl_cpardiso->iparm[10], 526 &icntl, 527 &flg);CHKERRQ(ierr); 528 if (flg) mat_mkl_cpardiso->iparm[10] = icntl; 529 530 ierr = PetscOptionsInt("-mat_mkl_cpardiso_12", 531 "Solve with transposed or conjugate transposed matrix A", 532 "None", 533 mat_mkl_cpardiso->iparm[11], 534 &icntl, 535 &flg);CHKERRQ(ierr); 536 if (flg) mat_mkl_cpardiso->iparm[11] = icntl; 537 538 ierr = PetscOptionsInt("-mat_mkl_cpardiso_13", 539 "Improved accuracy using (non-) symmetric weighted matching", 540 "None", 541 mat_mkl_cpardiso->iparm[12], 542 &icntl, 543 &flg);CHKERRQ(ierr); 544 if (flg) mat_mkl_cpardiso->iparm[12] = icntl; 545 546 ierr = PetscOptionsInt("-mat_mkl_cpardiso_18", 547 "Numbers of non-zero elements", 548 "None", 549 mat_mkl_cpardiso->iparm[17], 550 &icntl, 551 &flg);CHKERRQ(ierr); 552 if (flg) mat_mkl_cpardiso->iparm[17] = icntl; 553 554 ierr = PetscOptionsInt("-mat_mkl_cpardiso_19", 555 "Report number of floating point operations", 556 "None", 557 mat_mkl_cpardiso->iparm[18], 558 &icntl, 559 &flg);CHKERRQ(ierr); 560 if (flg) mat_mkl_cpardiso->iparm[18] = icntl; 561 562 ierr = PetscOptionsInt("-mat_mkl_cpardiso_21", 563 "Pivoting for symmetric indefinite matrices", 564 "None", 565 mat_mkl_cpardiso->iparm[20], 566 &icntl, 567 &flg);CHKERRQ(ierr); 568 if (flg) mat_mkl_cpardiso->iparm[20] = icntl; 569 570 ierr = PetscOptionsInt("-mat_mkl_cpardiso_24", 571 "Parallel factorization control", 572 "None", 573 mat_mkl_cpardiso->iparm[23], 574 &icntl, 575 &flg);CHKERRQ(ierr); 576 if (flg) mat_mkl_cpardiso->iparm[23] = icntl; 577 578 ierr = PetscOptionsInt("-mat_mkl_cpardiso_25", 579 "Parallel forward/backward solve control", 580 "None", 581 mat_mkl_cpardiso->iparm[24], 582 &icntl, 583 &flg);CHKERRQ(ierr); 584 if (flg) mat_mkl_cpardiso->iparm[24] = icntl; 585 586 ierr = PetscOptionsInt("-mat_mkl_cpardiso_27", 587 "Matrix checker", 588 "None", 589 mat_mkl_cpardiso->iparm[26], 590 &icntl, 591 &flg);CHKERRQ(ierr); 592 if (flg) mat_mkl_cpardiso->iparm[26] = icntl; 593 594 ierr = PetscOptionsInt("-mat_mkl_cpardiso_31", 595 "Partial solve and computing selected components of the solution vectors", 596 "None", 597 mat_mkl_cpardiso->iparm[30], 598 &icntl, 599 &flg);CHKERRQ(ierr); 600 if (flg) mat_mkl_cpardiso->iparm[30] = icntl; 601 602 ierr = PetscOptionsInt("-mat_mkl_cpardiso_34", 603 "Optimal number of threads for conditional numerical reproducibility (CNR) mode", 604 "None", 605 mat_mkl_cpardiso->iparm[33], 606 &icntl, 607 &flg);CHKERRQ(ierr); 608 if (flg) mat_mkl_cpardiso->iparm[33] = icntl; 609 610 ierr = PetscOptionsInt("-mat_mkl_cpardiso_60", 611 "Intel MKL_CPARDISO mode", 612 "None", 613 mat_mkl_cpardiso->iparm[59], 614 &icntl, 615 &flg);CHKERRQ(ierr); 616 if (flg) mat_mkl_cpardiso->iparm[59] = icntl; 617 } 618 619 PetscOptionsEnd(); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "PetscInitialize_MKL_CPARDISO" 625 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso) 626 { 627 PetscErrorCode ierr; 628 PetscInt i; 629 PetscMPIInt size; 630 631 PetscFunctionBegin; 632 633 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 634 ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr); 635 636 mat_mkl_cpardiso->CleanUp = PETSC_FALSE; 637 mat_mkl_cpardiso->maxfct = 1; 638 mat_mkl_cpardiso->mnum = 1; 639 mat_mkl_cpardiso->n = A->rmap->N; 640 mat_mkl_cpardiso->msglvl = 0; 641 mat_mkl_cpardiso->nrhs = 1; 642 mat_mkl_cpardiso->err = 0; 643 mat_mkl_cpardiso->phase = -1; 644 #if defined(PETSC_USE_COMPLEX) 645 mat_mkl_cpardiso->mtype = 13; 646 #else 647 mat_mkl_cpardiso->mtype = 11; 648 #endif 649 650 #if defined(PETSC_USE_REAL_SINGLE) 651 mat_mkl_cpardiso->iparm[27] = 1; 652 #else 653 mat_mkl_cpardiso->iparm[27] = 0; 654 #endif 655 656 mat_mkl_cpardiso->iparm[34] = 1; /* C style */ 657 658 mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 659 mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */ 660 mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */ 661 mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */ 662 mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 663 mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 664 mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 665 mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 666 mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 667 mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */ 668 669 mat_mkl_cpardiso->iparm[39] = 0; 670 if (size > 1) { 671 mat_mkl_cpardiso->iparm[39] = 2; 672 mat_mkl_cpardiso->iparm[40] = A->rmap->rstart; 673 mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1; 674 } 675 mat_mkl_cpardiso->perm = 0; 676 PetscFunctionReturn(0); 677 } 678 679 /* 680 * Symbolic decomposition. Mkl_Pardiso analysis phase. 681 */ 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_CPARDISO" 684 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 685 { 686 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr; 687 PetscErrorCode ierr; 688 689 PetscFunctionBegin; 690 mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 691 692 /* Set MKL_CPARDISO options from the options database */ 693 ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr); 694 695 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 696 697 mat_mkl_cpardiso->n = A->rmap->N; 698 699 /* analysis phase */ 700 /*----------------*/ 701 mat_mkl_cpardiso->phase = JOB_ANALYSIS; 702 703 cluster_sparse_solver ( 704 mat_mkl_cpardiso->pt, 705 &mat_mkl_cpardiso->maxfct, 706 &mat_mkl_cpardiso->mnum, 707 &mat_mkl_cpardiso->mtype, 708 &mat_mkl_cpardiso->phase, 709 &mat_mkl_cpardiso->n, 710 mat_mkl_cpardiso->a, 711 mat_mkl_cpardiso->ia, 712 mat_mkl_cpardiso->ja, 713 mat_mkl_cpardiso->perm, 714 &mat_mkl_cpardiso->nrhs, 715 mat_mkl_cpardiso->iparm, 716 &mat_mkl_cpardiso->msglvl, 717 NULL, 718 NULL, 719 &mat_mkl_cpardiso->comm_mkl_cpardiso, 720 &mat_mkl_cpardiso->err); 721 722 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 723 724 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 725 F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO; 726 F->ops->solve = MatSolve_MKL_CPARDISO; 727 F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 728 F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatView_MKL_CPARDISO" 734 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer) 735 { 736 PetscErrorCode ierr; 737 PetscBool iascii; 738 PetscViewerFormat format; 739 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr; 740 PetscInt i; 741 742 PetscFunctionBegin; 743 /* check if matrix is mkl_cpardiso type */ 744 if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0); 745 746 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 747 if (iascii) { 748 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 749 if (format == PETSC_VIEWER_ASCII_INFO) { 750 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr); 751 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr); 752 for(i = 1; i <= 64; i++){ 753 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr); 754 } 755 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr); 756 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr); 757 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr); 758 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr); 759 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 760 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr); 761 } 762 } 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "MatGetInfo_MKL_CPARDISO" 768 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info) 769 { 770 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->spptr; 771 772 PetscFunctionBegin; 773 info->block_size = 1.0; 774 info->nz_allocated = mat_mkl_cpardiso->nz + 0.0; 775 info->nz_unneeded = 0.0; 776 info->assemblies = 0.0; 777 info->mallocs = 0.0; 778 info->memory = 0.0; 779 info->fill_ratio_given = 0; 780 info->fill_ratio_needed = 0; 781 info->factor_mallocs = 0; 782 PetscFunctionReturn(0); 783 } 784 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatMkl_CPardisoSetCntl_MKL_CPARDISO" 787 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival) 788 { 789 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->spptr; 790 791 PetscFunctionBegin; 792 if(icntl <= 64){ 793 mat_mkl_cpardiso->iparm[icntl - 1] = ival; 794 } else { 795 if(icntl == 65) 796 mkl_set_num_threads((int)ival); 797 else if(icntl == 66) 798 mat_mkl_cpardiso->maxfct = ival; 799 else if(icntl == 67) 800 mat_mkl_cpardiso->mnum = ival; 801 else if(icntl == 68) 802 mat_mkl_cpardiso->msglvl = ival; 803 else if(icntl == 69){ 804 int pt[IPARM_SIZE]; 805 mat_mkl_cpardiso->mtype = ival; 806 #if defined(PETSC_USE_REAL_SINGLE) 807 mat_mkl_cpardiso->iparm[27] = 1; 808 #else 809 mat_mkl_cpardiso->iparm[27] = 0; 810 #endif 811 mat_mkl_cpardiso->iparm[34] = 1; 812 } 813 } 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatMkl_CPardisoSetCntl" 819 /*@ 820 MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters 821 822 Logically Collective on Mat 823 824 Input Parameters: 825 + F - the factored matrix obtained by calling MatGetFactor() 826 . icntl - index of Mkl_Pardiso parameter 827 - ival - value of Mkl_Pardiso parameter 828 829 Options Database: 830 . -mat_mkl_cpardiso_<icntl> <ival> 831 832 Level: beginner 833 834 References: Mkl_Pardiso Users' Guide 835 836 .seealso: MatGetFactor() 837 @*/ 838 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 839 { 840 PetscErrorCode ierr; 841 842 PetscFunctionBegin; 843 ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 844 PetscFunctionReturn(0); 845 } 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_cpardiso" 849 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type) 850 { 851 PetscFunctionBegin; 852 *type = MATSOLVERMKL_CPARDISO; 853 PetscFunctionReturn(0); 854 } 855 856 /* MatGetFactor for MPI AIJ matrices */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatGetFactor_mpiaij_mkl_cpardiso" 859 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F) 860 { 861 Mat B; 862 PetscErrorCode ierr; 863 Mat_MKL_CPARDISO *mat_mkl_cpardiso; 864 PetscBool isSeqAIJ; 865 866 PetscFunctionBegin; 867 /* Create the factorization matrix */ 868 869 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 870 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 871 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 872 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 873 874 ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr); 875 876 if (isSeqAIJ) { 877 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 878 mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO; 879 } else { 880 mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO; 881 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 882 } 883 884 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO; 885 B->ops->destroy = MatDestroy_MKL_CPARDISO; 886 887 B->ops->view = MatView_MKL_CPARDISO; 888 B->ops->getinfo = MatGetInfo_MKL_CPARDISO; 889 890 B->factortype = ftype; 891 B->assembled = PETSC_TRUE; /* required by -ksp_view */ 892 893 B->spptr = mat_mkl_cpardiso; 894 895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr); 896 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr); 897 ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr); 898 899 *F = B; 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatSolverPackageRegister_MKL_CPardiso" 905 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void) 906 { 907 PetscErrorCode ierr; 908 909 PetscFunctionBegin; 910 ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 911 ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914