1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <math.h> 11 #include <mkl.h> 12 #include <mkl_cluster_sparse_solver.h> 13 14 /* 15 * Possible mkl_cpardiso phases that controls the execution of the solver. 16 * For more information check mkl_cpardiso manual. 17 */ 18 #define JOB_ANALYSIS 11 19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 21 #define JOB_NUMERICAL_FACTORIZATION 22 22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 27 #define JOB_RELEASE_OF_LU_MEMORY 0 28 #define JOB_RELEASE_OF_ALL_MEMORY -1 29 30 #define IPARM_SIZE 64 31 #define INT_TYPE MKL_INT 32 33 static const char *Err_MSG_CPardiso(int errNo){ 34 switch (errNo) { 35 case -1: 36 return "input inconsistent"; break; 37 case -2: 38 return "not enough memory"; break; 39 case -3: 40 return "reordering problem"; break; 41 case -4: 42 return "zero pivot, numerical factorization or iterative refinement problem"; break; 43 case -5: 44 return "unclassified (internal) error"; break; 45 case -6: 46 return "preordering failed (matrix types 11, 13 only)"; break; 47 case -7: 48 return "diagonal matrix problem"; break; 49 case -8: 50 return "32-bit integer overflow problem"; break; 51 case -9: 52 return "not enough memory for OOC"; break; 53 case -10: 54 return "problems with opening OOC temporary files"; break; 55 case -11: 56 return "read/write problems with the OOC data file"; break; 57 default : 58 return "unknown error"; 59 } 60 } 61 62 /* 63 * Internal data structure. 64 * For more information check mkl_cpardiso manual. 65 */ 66 67 typedef struct { 68 69 /*Configuration vector*/ 70 INT_TYPE iparm[IPARM_SIZE]; 71 72 /* 73 * Internal mkl_cpardiso memory location. 74 * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak. 75 */ 76 void *pt[IPARM_SIZE]; 77 78 MPI_Comm comm_mkl_cpardiso; 79 80 /*Basic mkl_cpardiso info*/ 81 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 82 83 /*Matrix structure*/ 84 PetscScalar *a; 85 86 INT_TYPE *ia, *ja; 87 88 /*Number of non-zero elements*/ 89 INT_TYPE nz; 90 91 /*Row permutaton vector*/ 92 INT_TYPE *perm; 93 94 /*Deffine is matrix preserve sparce structure.*/ 95 MatStructure matstruc; 96 97 PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**); 98 99 /*True if mkl_cpardiso function have been used.*/ 100 PetscBool CleanUp; 101 } Mat_MKL_CPARDISO; 102 103 /* 104 * Copy the elements of matrix A. 105 * Input: 106 * - Mat A: MATSEQAIJ matrix 107 * - int shift: matrix index. 108 * - 0 for c representation 109 * - 1 for fortran representation 110 * - MatReuse reuse: 111 * - MAT_INITIAL_MATRIX: Create a new aij representation 112 * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values 113 * Output: 114 * - int *nnz: Number of nonzero-elements. 115 * - int **r pointer to i index 116 * - int **c pointer to j elements 117 * - MATRIXTYPE **v: Non-zero elements 118 */ 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatCopy_seqaij_seqaij_MKL_CPARDISO" 121 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v){ 122 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 123 PetscFunctionBegin; 124 *v=aa->a; 125 if (reuse == MAT_INITIAL_MATRIX) { 126 *r = (INT_TYPE*)aa->i; 127 *c = (INT_TYPE*)aa->j; 128 *nnz = aa->nz; 129 } 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO" 135 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 136 { 137 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 138 PetscErrorCode ierr; 139 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 140 PetscInt *row,*col; 141 const PetscScalar *av, *bv,*v1,*v2; 142 PetscScalar *val; 143 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 144 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 145 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 146 PetscInt nn, colA_start,jB,jcol; 147 148 PetscFunctionBegin; 149 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 150 av=aa->a; bv=bb->a; 151 152 garray = mat->garray; 153 154 if (reuse == MAT_INITIAL_MATRIX) { 155 nz = aa->nz + bb->nz; 156 *nnz = nz; 157 ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr); 158 col = row + m + 1; 159 val = (PetscScalar*)(col + nz); 160 *r = row; *c = col; *v = val; 161 row[0] = 0; 162 } else { 163 row = *r; col = *c; val = *v; 164 } 165 166 nz = 0; 167 for (i=0; i<m; i++) { 168 row[i] = nz; 169 countA = ai[i+1] - ai[i]; 170 countB = bi[i+1] - bi[i]; 171 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 172 bjj = bj + bi[i]; 173 174 /* B part, smaller col index */ 175 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 176 jB = 0; 177 for (j=0; j<countB; j++) { 178 jcol = garray[bjj[j]]; 179 if (jcol > colA_start) { 180 jB = j; 181 break; 182 } 183 col[nz] = jcol; 184 val[nz++] = *bv++; 185 if (j==countB-1) jB = countB; 186 } 187 188 /* A part */ 189 for (j=0; j<countA; j++) { 190 col[nz] = rstart + ajj[j]; 191 val[nz++] = *av++; 192 } 193 194 /* B part, larger col index */ 195 for (j=jB; j<countB; j++) { 196 col[nz] = garray[bjj[j]]; 197 val[nz++] = *bv++; 198 } 199 } 200 row[m] = nz; 201 202 PetscFunctionReturn(0); 203 } 204 205 /* 206 * Free memory for Mat_MKL_CPARDISO structure and pointers to objects. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatDestroy_MKL_CPARDISO" 210 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A){ 211 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr; 212 PetscErrorCode ierr; 213 214 PetscFunctionBegin; 215 /* Terminate instance, deallocate memories */ 216 if (mat_mkl_cpardiso->CleanUp) { 217 mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 218 219 cluster_sparse_solver ( 220 mat_mkl_cpardiso->pt, 221 &mat_mkl_cpardiso->maxfct, 222 &mat_mkl_cpardiso->mnum, 223 &mat_mkl_cpardiso->mtype, 224 &mat_mkl_cpardiso->phase, 225 &mat_mkl_cpardiso->n, 226 NULL, 227 NULL, 228 NULL, 229 mat_mkl_cpardiso->perm, 230 &mat_mkl_cpardiso->nrhs, 231 mat_mkl_cpardiso->iparm, 232 &mat_mkl_cpardiso->msglvl, 233 NULL, 234 NULL, 235 &mat_mkl_cpardiso->comm_mkl_cpardiso, 236 &mat_mkl_cpardiso->err); 237 } 238 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 239 ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 240 241 /* clear composed functions */ 242 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr); 243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 /* 248 * Computes Ax = b 249 */ 250 #undef __FUNCT__ 251 #define __FUNCT__ "MatSolve_MKL_CPARDISO" 252 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x){ 253 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr; 254 PetscErrorCode ierr; 255 PetscScalar *barray, *xarray; 256 257 PetscFunctionBegin; 258 259 mat_mkl_cpardiso->nrhs = 1; 260 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 261 ierr = VecGetArray(b,&barray);CHKERRQ(ierr); 262 263 /* solve phase */ 264 /*-------------*/ 265 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 266 cluster_sparse_solver ( 267 mat_mkl_cpardiso->pt, 268 &mat_mkl_cpardiso->maxfct, 269 &mat_mkl_cpardiso->mnum, 270 &mat_mkl_cpardiso->mtype, 271 &mat_mkl_cpardiso->phase, 272 &mat_mkl_cpardiso->n, 273 mat_mkl_cpardiso->a, 274 mat_mkl_cpardiso->ia, 275 mat_mkl_cpardiso->ja, 276 mat_mkl_cpardiso->perm, 277 &mat_mkl_cpardiso->nrhs, 278 mat_mkl_cpardiso->iparm, 279 &mat_mkl_cpardiso->msglvl, 280 (void*)barray, 281 (void*)xarray, 282 &mat_mkl_cpardiso->comm_mkl_cpardiso, 283 &mat_mkl_cpardiso->err); 284 285 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 286 287 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatSolveTranspose_MKL_CPARDISO" 293 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x){ 294 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr; 295 PetscErrorCode ierr; 296 297 PetscFunctionBegin; 298 #if defined(PETSC_USE_COMPLEX) 299 mat_mkl_cpardiso->iparm[12 - 1] = 1; 300 #else 301 mat_mkl_cpardiso->iparm[12 - 1] = 2; 302 #endif 303 ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr); 304 mat_mkl_cpardiso->iparm[12 - 1] = 0; 305 PetscFunctionReturn(0); 306 } 307 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatMatSolve_MKL_CPARDISO" 310 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X){ 311 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr; 312 PetscErrorCode ierr; 313 PetscScalar *barray, *xarray; 314 PetscBool flg; 315 316 PetscFunctionBegin; 317 318 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr); 319 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix"); 320 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr); 321 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix"); 322 323 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 324 325 if(mat_mkl_cpardiso->nrhs > 0){ 326 ierr = MatDenseGetArray(B,&barray); 327 ierr = MatDenseGetArray(X,&xarray); 328 329 /* solve phase */ 330 /*-------------*/ 331 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 332 cluster_sparse_solver ( 333 mat_mkl_cpardiso->pt, 334 &mat_mkl_cpardiso->maxfct, 335 &mat_mkl_cpardiso->mnum, 336 &mat_mkl_cpardiso->mtype, 337 &mat_mkl_cpardiso->phase, 338 &mat_mkl_cpardiso->n, 339 mat_mkl_cpardiso->a, 340 mat_mkl_cpardiso->ia, 341 mat_mkl_cpardiso->ja, 342 mat_mkl_cpardiso->perm, 343 &mat_mkl_cpardiso->nrhs, 344 mat_mkl_cpardiso->iparm, 345 &mat_mkl_cpardiso->msglvl, 346 (void*)barray, 347 (void*)xarray, 348 &mat_mkl_cpardiso->comm_mkl_cpardiso, 349 &mat_mkl_cpardiso->err); 350 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 351 } 352 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 353 PetscFunctionReturn(0); 354 355 } 356 357 /* 358 * LU Decomposition 359 */ 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatFactorNumeric_MKL_CPARDISO" 362 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info){ 363 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->spptr; 364 PetscErrorCode ierr; 365 366 /* numerical factorization phase */ 367 /*-------------------------------*/ 368 369 PetscFunctionBegin; 370 371 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 372 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a);CHKERRQ(ierr); 373 374 /* numerical factorization phase */ 375 /*-------------------------------*/ 376 mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION; 377 cluster_sparse_solver ( 378 mat_mkl_cpardiso->pt, 379 &mat_mkl_cpardiso->maxfct, 380 &mat_mkl_cpardiso->mnum, 381 &mat_mkl_cpardiso->mtype, 382 &mat_mkl_cpardiso->phase, 383 &mat_mkl_cpardiso->n, 384 mat_mkl_cpardiso->a, 385 mat_mkl_cpardiso->ia, 386 mat_mkl_cpardiso->ja, 387 mat_mkl_cpardiso->perm, 388 &mat_mkl_cpardiso->nrhs, 389 mat_mkl_cpardiso->iparm, 390 &mat_mkl_cpardiso->msglvl, 391 NULL, 392 NULL, 393 &mat_mkl_cpardiso->comm_mkl_cpardiso, 394 &mat_mkl_cpardiso->err); 395 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 396 397 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 398 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 399 PetscFunctionReturn(0); 400 } 401 402 /* Sets mkl_cpardiso options from the options database */ 403 #undef __FUNCT__ 404 #define __FUNCT__ "PetscSetMKL_CPARDISOFromOptions" 405 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A){ 406 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr; 407 PetscErrorCode ierr; 408 PetscInt icntl; 409 PetscBool flg; 410 int pt[IPARM_SIZE], threads; 411 412 PetscFunctionBegin; 413 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr); 414 ierr = PetscOptionsInt("-mat_mkl_cpardiso_65", 415 "Number of thrads to use", 416 "None", 417 threads, 418 &threads, 419 &flg);CHKERRQ(ierr); 420 if (flg) mkl_set_num_threads(threads); 421 422 ierr = PetscOptionsInt("-mat_mkl_cpardiso_66", 423 "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time", 424 "None", 425 mat_mkl_cpardiso->maxfct, 426 &icntl, 427 &flg);CHKERRQ(ierr); 428 if (flg) mat_mkl_cpardiso->maxfct = icntl; 429 430 ierr = PetscOptionsInt("-mat_mkl_cpardiso_67", 431 "Indicates the actual matrix for the solution phase", 432 "None", 433 mat_mkl_cpardiso->mnum, 434 &icntl, 435 &flg);CHKERRQ(ierr); 436 if (flg) mat_mkl_cpardiso->mnum = icntl; 437 438 ierr = PetscOptionsInt("-mat_mkl_cpardiso_68", 439 "Message level information", 440 "None", 441 mat_mkl_cpardiso->msglvl, 442 &icntl, 443 &flg);CHKERRQ(ierr); 444 if (flg) mat_mkl_cpardiso->msglvl = icntl; 445 446 ierr = PetscOptionsInt("-mat_mkl_cpardiso_69", 447 "Defines the matrix type", 448 "None", 449 mat_mkl_cpardiso->mtype, 450 &icntl, 451 &flg);CHKERRQ(ierr); 452 if(flg){ 453 mat_mkl_cpardiso->mtype = icntl; 454 #if defined(PETSC_USE_REAL_SINGLE) 455 mat_mkl_cpardiso->iparm[27] = 1; 456 #else 457 mat_mkl_cpardiso->iparm[27] = 0; 458 #endif 459 mat_mkl_cpardiso->iparm[34] = 1; 460 } 461 ierr = PetscOptionsInt("-mat_mkl_cpardiso_1", 462 "Use default values", 463 "None", 464 mat_mkl_cpardiso->iparm[0], 465 &icntl, 466 &flg);CHKERRQ(ierr); 467 468 if(flg && icntl != 0){ 469 ierr = PetscOptionsInt("-mat_mkl_cpardiso_2", 470 "Fill-in reducing ordering for the input matrix", 471 "None", 472 mat_mkl_cpardiso->iparm[1], 473 &icntl, 474 &flg);CHKERRQ(ierr); 475 if (flg) mat_mkl_cpardiso->iparm[1] = icntl; 476 477 ierr = PetscOptionsInt("-mat_mkl_cpardiso_4", 478 "Preconditioned CGS/CG", 479 "None", 480 mat_mkl_cpardiso->iparm[3], 481 &icntl, 482 &flg);CHKERRQ(ierr); 483 if (flg) mat_mkl_cpardiso->iparm[3] = icntl; 484 485 ierr = PetscOptionsInt("-mat_mkl_cpardiso_5", 486 "User permutation", 487 "None", 488 mat_mkl_cpardiso->iparm[4], 489 &icntl, 490 &flg);CHKERRQ(ierr); 491 if (flg) mat_mkl_cpardiso->iparm[4] = icntl; 492 493 ierr = PetscOptionsInt("-mat_mkl_cpardiso_6", 494 "Write solution on x", 495 "None", 496 mat_mkl_cpardiso->iparm[5], 497 &icntl, 498 &flg);CHKERRQ(ierr); 499 if (flg) mat_mkl_cpardiso->iparm[5] = icntl; 500 501 ierr = PetscOptionsInt("-mat_mkl_cpardiso_8", 502 "Iterative refinement step", 503 "None", 504 mat_mkl_cpardiso->iparm[7], 505 &icntl, 506 &flg);CHKERRQ(ierr); 507 if (flg) mat_mkl_cpardiso->iparm[7] = icntl; 508 509 ierr = PetscOptionsInt("-mat_mkl_cpardiso_10", 510 "Pivoting perturbation", 511 "None", 512 mat_mkl_cpardiso->iparm[9], 513 &icntl, 514 &flg);CHKERRQ(ierr); 515 if (flg) mat_mkl_cpardiso->iparm[9] = icntl; 516 517 ierr = PetscOptionsInt("-mat_mkl_cpardiso_11", 518 "Scaling vectors", 519 "None", 520 mat_mkl_cpardiso->iparm[10], 521 &icntl, 522 &flg);CHKERRQ(ierr); 523 if (flg) mat_mkl_cpardiso->iparm[10] = icntl; 524 525 ierr = PetscOptionsInt("-mat_mkl_cpardiso_12", 526 "Solve with transposed or conjugate transposed matrix A", 527 "None", 528 mat_mkl_cpardiso->iparm[11], 529 &icntl, 530 &flg);CHKERRQ(ierr); 531 if (flg) mat_mkl_cpardiso->iparm[11] = icntl; 532 533 ierr = PetscOptionsInt("-mat_mkl_cpardiso_13", 534 "Improved accuracy using (non-) symmetric weighted matching", 535 "None", 536 mat_mkl_cpardiso->iparm[12], 537 &icntl, 538 &flg);CHKERRQ(ierr); 539 if (flg) mat_mkl_cpardiso->iparm[12] = icntl; 540 541 ierr = PetscOptionsInt("-mat_mkl_cpardiso_18", 542 "Numbers of non-zero elements", 543 "None", 544 mat_mkl_cpardiso->iparm[17], 545 &icntl, 546 &flg);CHKERRQ(ierr); 547 if (flg) mat_mkl_cpardiso->iparm[17] = icntl; 548 549 ierr = PetscOptionsInt("-mat_mkl_cpardiso_19", 550 "Report number of floating point operations", 551 "None", 552 mat_mkl_cpardiso->iparm[18], 553 &icntl, 554 &flg);CHKERRQ(ierr); 555 if (flg) mat_mkl_cpardiso->iparm[18] = icntl; 556 557 ierr = PetscOptionsInt("-mat_mkl_cpardiso_21", 558 "Pivoting for symmetric indefinite matrices", 559 "None", 560 mat_mkl_cpardiso->iparm[20], 561 &icntl, 562 &flg);CHKERRQ(ierr); 563 if (flg) mat_mkl_cpardiso->iparm[20] = icntl; 564 565 ierr = PetscOptionsInt("-mat_mkl_cpardiso_24", 566 "Parallel factorization control", 567 "None", 568 mat_mkl_cpardiso->iparm[23], 569 &icntl, 570 &flg);CHKERRQ(ierr); 571 if (flg) mat_mkl_cpardiso->iparm[23] = icntl; 572 573 ierr = PetscOptionsInt("-mat_mkl_cpardiso_25", 574 "Parallel forward/backward solve control", 575 "None", 576 mat_mkl_cpardiso->iparm[24], 577 &icntl, 578 &flg);CHKERRQ(ierr); 579 if (flg) mat_mkl_cpardiso->iparm[24] = icntl; 580 581 ierr = PetscOptionsInt("-mat_mkl_cpardiso_27", 582 "Matrix checker", 583 "None", 584 mat_mkl_cpardiso->iparm[26], 585 &icntl, 586 &flg);CHKERRQ(ierr); 587 if (flg) mat_mkl_cpardiso->iparm[26] = icntl; 588 589 ierr = PetscOptionsInt("-mat_mkl_cpardiso_31", 590 "Partial solve and computing selected components of the solution vectors", 591 "None", 592 mat_mkl_cpardiso->iparm[30], 593 &icntl, 594 &flg);CHKERRQ(ierr); 595 if (flg) mat_mkl_cpardiso->iparm[30] = icntl; 596 597 ierr = PetscOptionsInt("-mat_mkl_cpardiso_34", 598 "Optimal number of threads for conditional numerical reproducibility (CNR) mode", 599 "None", 600 mat_mkl_cpardiso->iparm[33], 601 &icntl, 602 &flg);CHKERRQ(ierr); 603 if (flg) mat_mkl_cpardiso->iparm[33] = icntl; 604 605 ierr = PetscOptionsInt("-mat_mkl_cpardiso_60", 606 "Intel MKL_CPARDISO mode", 607 "None", 608 mat_mkl_cpardiso->iparm[59], 609 &icntl, 610 &flg);CHKERRQ(ierr); 611 if (flg) mat_mkl_cpardiso->iparm[59] = icntl; 612 } 613 614 PetscOptionsEnd(); 615 PetscFunctionReturn(0); 616 } 617 618 #undef __FUNCT__ 619 #define __FUNCT__ "PetscInitialize_MKL_CPARDISO" 620 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso){ 621 PetscErrorCode ierr; 622 PetscInt i; 623 int SizeComm = -1; 624 625 PetscFunctionBegin; 626 627 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 628 ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &SizeComm);CHKERRQ(ierr); 629 630 mat_mkl_cpardiso->CleanUp = PETSC_FALSE; 631 mat_mkl_cpardiso->maxfct = 1; 632 mat_mkl_cpardiso->mnum = 1; 633 mat_mkl_cpardiso->n = A->rmap->N; 634 mat_mkl_cpardiso->msglvl = 0; 635 mat_mkl_cpardiso->nrhs = 1; 636 mat_mkl_cpardiso->err = 0; 637 mat_mkl_cpardiso->phase = -1; 638 #if defined(PETSC_USE_COMPLEX) 639 mat_mkl_cpardiso->mtype = 13; 640 #else 641 mat_mkl_cpardiso->mtype = 11; 642 #endif 643 644 #if defined(PETSC_USE_REAL_SINGLE) 645 mat_mkl_cpardiso->iparm[27] = 1; 646 #else 647 mat_mkl_cpardiso->iparm[27] = 0; 648 #endif 649 650 mat_mkl_cpardiso->iparm[34] = 1; /* C style */ 651 652 mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 653 mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */ 654 mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */ 655 mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */ 656 mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 657 mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 658 mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 659 mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 660 mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 661 mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */ 662 663 mat_mkl_cpardiso->iparm[39] = 0; 664 if (SizeComm > 1) { 665 mat_mkl_cpardiso->iparm[39] = 2; 666 mat_mkl_cpardiso->iparm[40] = A->rmap->rstart; 667 mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1; 668 } 669 670 mat_mkl_cpardiso->perm = 0; 671 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 * Symbolic decomposition. Mkl_Pardiso analysis phase. 677 */ 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_CPARDISO" 680 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info){ 681 682 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr; 683 PetscErrorCode ierr; 684 685 PetscFunctionBegin; 686 687 mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 688 689 /* Set MKL_CPARDISO options from the options database */ 690 ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr); 691 692 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_INITIAL_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a);CHKERRQ(ierr); 693 694 mat_mkl_cpardiso->n = A->rmap->N; 695 696 /* analysis phase */ 697 /*----------------*/ 698 mat_mkl_cpardiso->phase = JOB_ANALYSIS; 699 700 cluster_sparse_solver ( 701 mat_mkl_cpardiso->pt, 702 &mat_mkl_cpardiso->maxfct, 703 &mat_mkl_cpardiso->mnum, 704 &mat_mkl_cpardiso->mtype, 705 &mat_mkl_cpardiso->phase, 706 &mat_mkl_cpardiso->n, 707 mat_mkl_cpardiso->a, 708 mat_mkl_cpardiso->ia, 709 mat_mkl_cpardiso->ja, 710 mat_mkl_cpardiso->perm, 711 &mat_mkl_cpardiso->nrhs, 712 mat_mkl_cpardiso->iparm, 713 &mat_mkl_cpardiso->msglvl, 714 NULL, 715 NULL, 716 &mat_mkl_cpardiso->comm_mkl_cpardiso, 717 &mat_mkl_cpardiso->err); 718 719 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 720 721 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 722 F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO; 723 F->ops->solve = MatSolve_MKL_CPARDISO; 724 F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 725 F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatView_MKL_CPARDISO" 731 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer){ 732 PetscErrorCode ierr; 733 PetscBool iascii; 734 PetscViewerFormat format; 735 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr; 736 PetscInt i; 737 738 PetscFunctionBegin; 739 /* check if matrix is mkl_cpardiso type */ 740 if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0); 741 742 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 743 if (iascii) { 744 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 745 if (format == PETSC_VIEWER_ASCII_INFO) { 746 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr); 747 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr); 748 for(i = 1; i <= 64; i++){ 749 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr); 750 } 751 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr); 752 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr); 753 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr); 754 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr); 755 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 756 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr); 757 } 758 } 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatGetInfo_MKL_CPARDISO" 764 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info){ 765 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->spptr; 766 767 PetscFunctionBegin; 768 info->block_size = 1.0; 769 info->nz_allocated = mat_mkl_cpardiso->nz + 0.0; 770 info->nz_unneeded = 0.0; 771 info->assemblies = 0.0; 772 info->mallocs = 0.0; 773 info->memory = 0.0; 774 info->fill_ratio_given = 0; 775 info->fill_ratio_needed = 0; 776 info->factor_mallocs = 0; 777 PetscFunctionReturn(0); 778 } 779 780 #undef __FUNCT__ 781 #define __FUNCT__ "MatMkl_CPardisoSetCntl_MKL_CPARDISO" 782 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival){ 783 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->spptr; 784 PetscFunctionBegin; 785 if(icntl <= 64){ 786 mat_mkl_cpardiso->iparm[icntl - 1] = ival; 787 } else { 788 if(icntl == 65) 789 mkl_set_num_threads((int)ival); 790 else if(icntl == 66) 791 mat_mkl_cpardiso->maxfct = ival; 792 else if(icntl == 67) 793 mat_mkl_cpardiso->mnum = ival; 794 else if(icntl == 68) 795 mat_mkl_cpardiso->msglvl = ival; 796 else if(icntl == 69){ 797 int pt[IPARM_SIZE]; 798 mat_mkl_cpardiso->mtype = ival; 799 #if defined(PETSC_USE_REAL_SINGLE) 800 mat_mkl_cpardiso->iparm[27] = 1; 801 #else 802 mat_mkl_cpardiso->iparm[27] = 0; 803 #endif 804 mat_mkl_cpardiso->iparm[34] = 1; 805 } 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatMkl_CPardisoSetCntl" 812 /*@ 813 MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters 814 815 Logically Collective on Mat 816 817 Input Parameters: 818 + F - the factored matrix obtained by calling MatGetFactor() 819 . icntl - index of Mkl_Pardiso parameter 820 - ival - value of Mkl_Pardiso parameter 821 822 Options Database: 823 . -mat_mkl_cpardiso_<icntl> <ival> 824 825 Level: beginner 826 827 References: Mkl_Pardiso Users' Guide 828 829 .seealso: MatGetFactor() 830 @*/ 831 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 832 { 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 837 PetscFunctionReturn(0); 838 } 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_cpardiso" 842 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type){ 843 PetscFunctionBegin; 844 *type = MATSOLVERMKL_CPARDISO; 845 PetscFunctionReturn(0); 846 } 847 848 /* MatGetFactor for MPI AIJ matrices */ 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatGetFactor_mpiaij_mkl_cpardiso" 851 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F){ 852 Mat B; 853 PetscErrorCode ierr; 854 Mat_MKL_CPARDISO *mat_mkl_cpardiso; 855 PetscBool isSeqAIJ; 856 857 PetscFunctionBegin; 858 /* Create the factorization matrix */ 859 860 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 861 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 862 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 863 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 864 865 ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr); 866 867 if (isSeqAIJ) { 868 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 869 mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO; 870 } else { 871 mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO; 872 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 873 } 874 875 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO; 876 B->ops->destroy = MatDestroy_MKL_CPARDISO; 877 878 B->ops->view = MatView_MKL_CPARDISO; 879 B->ops->getinfo = MatGetInfo_MKL_CPARDISO; 880 881 B->factortype = ftype; 882 B->assembled = PETSC_TRUE; /* required by -ksp_view */ 883 884 B->spptr = mat_mkl_cpardiso; 885 886 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr); 888 ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr); 889 890 *F = B; 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatSolverPackageRegister_MKL_CPardiso" 896 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void) 897 { 898 PetscErrorCode ierr; 899 PetscFunctionBegin; 900 ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 901 ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904