1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <math.h> 11 #include <mkl.h> 12 #include <mkl_cluster_sparse_solver.h> 13 14 /* 15 * Possible mkl_cpardiso phases that controls the execution of the solver. 16 * For more information check mkl_cpardiso manual. 17 */ 18 #define JOB_ANALYSIS 11 19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 21 #define JOB_NUMERICAL_FACTORIZATION 22 22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 27 #define JOB_RELEASE_OF_LU_MEMORY 0 28 #define JOB_RELEASE_OF_ALL_MEMORY -1 29 30 #define IPARM_SIZE 64 31 #define INT_TYPE MKL_INT 32 33 static const char *Err_MSG_CPardiso(int errNo){ 34 switch (errNo) { 35 case -1: 36 return "input inconsistent"; break; 37 case -2: 38 return "not enough memory"; break; 39 case -3: 40 return "reordering problem"; break; 41 case -4: 42 return "zero pivot, numerical factorization or iterative refinement problem"; break; 43 case -5: 44 return "unclassified (internal) error"; break; 45 case -6: 46 return "preordering failed (matrix types 11, 13 only)"; break; 47 case -7: 48 return "diagonal matrix problem"; break; 49 case -8: 50 return "32-bit integer overflow problem"; break; 51 case -9: 52 return "not enough memory for OOC"; break; 53 case -10: 54 return "problems with opening OOC temporary files"; break; 55 case -11: 56 return "read/write problems with the OOC data file"; break; 57 default : 58 return "unknown error"; 59 } 60 } 61 62 /* 63 * Internal data structure. 64 * For more information check mkl_cpardiso manual. 65 */ 66 67 typedef struct { 68 69 /* Configuration vector */ 70 INT_TYPE iparm[IPARM_SIZE]; 71 72 /* 73 * Internal mkl_cpardiso memory location. 74 * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak. 75 */ 76 void *pt[IPARM_SIZE]; 77 78 MPI_Comm comm_mkl_cpardiso; 79 80 /* Basic mkl_cpardiso info*/ 81 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 82 83 /* Matrix structure */ 84 PetscScalar *a; 85 86 INT_TYPE *ia, *ja; 87 88 /* Number of non-zero elements */ 89 INT_TYPE nz; 90 91 /* Row permutaton vector*/ 92 INT_TYPE *perm; 93 94 /* Define is matrix preserve sparce structure. */ 95 MatStructure matstruc; 96 97 PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**); 98 99 /* True if mkl_cpardiso function have been used. */ 100 PetscBool CleanUp; 101 } Mat_MKL_CPARDISO; 102 103 /* 104 * Copy the elements of matrix A. 105 * Input: 106 * - Mat A: MATSEQAIJ matrix 107 * - int shift: matrix index. 108 * - 0 for c representation 109 * - 1 for fortran representation 110 * - MatReuse reuse: 111 * - MAT_INITIAL_MATRIX: Create a new aij representation 112 * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values 113 * Output: 114 * - int *nnz: Number of nonzero-elements. 115 * - int **r pointer to i index 116 * - int **c pointer to j elements 117 * - MATRIXTYPE **v: Non-zero elements 118 */ 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatCopy_seqaij_seqaij_MKL_CPARDISO" 121 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 122 { 123 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 124 125 PetscFunctionBegin; 126 *v=aa->a; 127 if (reuse == MAT_INITIAL_MATRIX) { 128 *r = (INT_TYPE*)aa->i; 129 *c = (INT_TYPE*)aa->j; 130 *nnz = aa->nz; 131 } 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO" 137 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 138 { 139 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 140 PetscErrorCode ierr; 141 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 142 PetscInt *row,*col; 143 const PetscScalar *av, *bv,*v1,*v2; 144 PetscScalar *val; 145 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 146 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 147 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 148 PetscInt nn, colA_start,jB,jcol; 149 150 PetscFunctionBegin; 151 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 152 av=aa->a; bv=bb->a; 153 154 garray = mat->garray; 155 156 if (reuse == MAT_INITIAL_MATRIX) { 157 nz = aa->nz + bb->nz; 158 *nnz = nz; 159 ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr); 160 col = row + m + 1; 161 val = (PetscScalar*)(col + nz); 162 *r = row; *c = col; *v = val; 163 row[0] = 0; 164 } else { 165 row = *r; col = *c; val = *v; 166 } 167 168 nz = 0; 169 for (i=0; i<m; i++) { 170 row[i] = nz; 171 countA = ai[i+1] - ai[i]; 172 countB = bi[i+1] - bi[i]; 173 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 174 bjj = bj + bi[i]; 175 176 /* B part, smaller col index */ 177 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 178 jB = 0; 179 for (j=0; j<countB; j++) { 180 jcol = garray[bjj[j]]; 181 if (jcol > colA_start) { 182 jB = j; 183 break; 184 } 185 col[nz] = jcol; 186 val[nz++] = *bv++; 187 if (j==countB-1) jB = countB; 188 } 189 190 /* A part */ 191 for (j=0; j<countA; j++) { 192 col[nz] = rstart + ajj[j]; 193 val[nz++] = *av++; 194 } 195 196 /* B part, larger col index */ 197 for (j=jB; j<countB; j++) { 198 col[nz] = garray[bjj[j]]; 199 val[nz++] = *bv++; 200 } 201 } 202 row[m] = nz; 203 204 PetscFunctionReturn(0); 205 } 206 207 /* 208 * Free memory for Mat_MKL_CPARDISO structure and pointers to objects. 209 */ 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatDestroy_MKL_CPARDISO" 212 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A) 213 { 214 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr; 215 PetscErrorCode ierr; 216 PetscBool flg; 217 218 PetscFunctionBegin; 219 /* Terminate instance, deallocate memories */ 220 if (mat_mkl_cpardiso->CleanUp) { 221 mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 222 223 cluster_sparse_solver ( 224 mat_mkl_cpardiso->pt, 225 &mat_mkl_cpardiso->maxfct, 226 &mat_mkl_cpardiso->mnum, 227 &mat_mkl_cpardiso->mtype, 228 &mat_mkl_cpardiso->phase, 229 &mat_mkl_cpardiso->n, 230 NULL, 231 NULL, 232 NULL, 233 mat_mkl_cpardiso->perm, 234 &mat_mkl_cpardiso->nrhs, 235 mat_mkl_cpardiso->iparm, 236 &mat_mkl_cpardiso->msglvl, 237 NULL, 238 NULL, 239 &mat_mkl_cpardiso->comm_mkl_cpardiso, 240 &mat_mkl_cpardiso->err); 241 } 242 243 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&flg);CHKERRQ(ierr); 244 if (flg) { 245 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 246 } else { 247 ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr); 248 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 249 } 250 ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 251 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 252 253 /* clear composed functions */ 254 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr); 255 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 /* 260 * Computes Ax = b 261 */ 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatSolve_MKL_CPARDISO" 264 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x) 265 { 266 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr; 267 PetscErrorCode ierr; 268 PetscScalar *xarray; 269 const PetscScalar *barray; 270 271 PetscFunctionBegin; 272 mat_mkl_cpardiso->nrhs = 1; 273 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 274 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 275 276 /* solve phase */ 277 /*-------------*/ 278 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 279 cluster_sparse_solver ( 280 mat_mkl_cpardiso->pt, 281 &mat_mkl_cpardiso->maxfct, 282 &mat_mkl_cpardiso->mnum, 283 &mat_mkl_cpardiso->mtype, 284 &mat_mkl_cpardiso->phase, 285 &mat_mkl_cpardiso->n, 286 mat_mkl_cpardiso->a, 287 mat_mkl_cpardiso->ia, 288 mat_mkl_cpardiso->ja, 289 mat_mkl_cpardiso->perm, 290 &mat_mkl_cpardiso->nrhs, 291 mat_mkl_cpardiso->iparm, 292 &mat_mkl_cpardiso->msglvl, 293 (void*)barray, 294 (void*)xarray, 295 &mat_mkl_cpardiso->comm_mkl_cpardiso, 296 &mat_mkl_cpardiso->err); 297 298 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 299 300 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 301 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 302 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSolveTranspose_MKL_CPARDISO" 308 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x) 309 { 310 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr; 311 PetscErrorCode ierr; 312 313 PetscFunctionBegin; 314 #if defined(PETSC_USE_COMPLEX) 315 mat_mkl_cpardiso->iparm[12 - 1] = 1; 316 #else 317 mat_mkl_cpardiso->iparm[12 - 1] = 2; 318 #endif 319 ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr); 320 mat_mkl_cpardiso->iparm[12 - 1] = 0; 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatMatSolve_MKL_CPARDISO" 326 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X) 327 { 328 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr; 329 PetscErrorCode ierr; 330 PetscScalar *barray, *xarray; 331 PetscBool flg; 332 333 PetscFunctionBegin; 334 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr); 335 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix"); 336 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr); 337 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix"); 338 339 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 340 341 if(mat_mkl_cpardiso->nrhs > 0){ 342 ierr = MatDenseGetArray(B,&barray); 343 ierr = MatDenseGetArray(X,&xarray); 344 345 /* solve phase */ 346 /*-------------*/ 347 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 348 cluster_sparse_solver ( 349 mat_mkl_cpardiso->pt, 350 &mat_mkl_cpardiso->maxfct, 351 &mat_mkl_cpardiso->mnum, 352 &mat_mkl_cpardiso->mtype, 353 &mat_mkl_cpardiso->phase, 354 &mat_mkl_cpardiso->n, 355 mat_mkl_cpardiso->a, 356 mat_mkl_cpardiso->ia, 357 mat_mkl_cpardiso->ja, 358 mat_mkl_cpardiso->perm, 359 &mat_mkl_cpardiso->nrhs, 360 mat_mkl_cpardiso->iparm, 361 &mat_mkl_cpardiso->msglvl, 362 (void*)barray, 363 (void*)xarray, 364 &mat_mkl_cpardiso->comm_mkl_cpardiso, 365 &mat_mkl_cpardiso->err); 366 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 367 } 368 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 369 PetscFunctionReturn(0); 370 371 } 372 373 /* 374 * LU Decomposition 375 */ 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatFactorNumeric_MKL_CPARDISO" 378 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info) 379 { 380 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->spptr; 381 PetscErrorCode ierr; 382 383 /* numerical factorization phase */ 384 /*-------------------------------*/ 385 386 PetscFunctionBegin; 387 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 388 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 389 390 /* numerical factorization phase */ 391 /*-------------------------------*/ 392 mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION; 393 cluster_sparse_solver ( 394 mat_mkl_cpardiso->pt, 395 &mat_mkl_cpardiso->maxfct, 396 &mat_mkl_cpardiso->mnum, 397 &mat_mkl_cpardiso->mtype, 398 &mat_mkl_cpardiso->phase, 399 &mat_mkl_cpardiso->n, 400 mat_mkl_cpardiso->a, 401 mat_mkl_cpardiso->ia, 402 mat_mkl_cpardiso->ja, 403 mat_mkl_cpardiso->perm, 404 &mat_mkl_cpardiso->nrhs, 405 mat_mkl_cpardiso->iparm, 406 &mat_mkl_cpardiso->msglvl, 407 NULL, 408 NULL, 409 &mat_mkl_cpardiso->comm_mkl_cpardiso, 410 &mat_mkl_cpardiso->err); 411 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 412 413 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 414 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 415 PetscFunctionReturn(0); 416 } 417 418 /* Sets mkl_cpardiso options from the options database */ 419 #undef __FUNCT__ 420 #define __FUNCT__ "PetscSetMKL_CPARDISOFromOptions" 421 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A) 422 { 423 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr; 424 PetscErrorCode ierr; 425 PetscInt icntl; 426 PetscBool flg; 427 int pt[IPARM_SIZE], threads; 428 429 PetscFunctionBegin; 430 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr); 431 ierr = PetscOptionsInt("-mat_mkl_cpardiso_65", 432 "Number of threads to use", 433 "None", 434 threads, 435 &threads, 436 &flg);CHKERRQ(ierr); 437 if (flg) mkl_set_num_threads(threads); 438 439 ierr = PetscOptionsInt("-mat_mkl_cpardiso_66", 440 "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time", 441 "None", 442 mat_mkl_cpardiso->maxfct, 443 &icntl, 444 &flg);CHKERRQ(ierr); 445 if (flg) mat_mkl_cpardiso->maxfct = icntl; 446 447 ierr = PetscOptionsInt("-mat_mkl_cpardiso_67", 448 "Indicates the actual matrix for the solution phase", 449 "None", 450 mat_mkl_cpardiso->mnum, 451 &icntl, 452 &flg);CHKERRQ(ierr); 453 if (flg) mat_mkl_cpardiso->mnum = icntl; 454 455 ierr = PetscOptionsInt("-mat_mkl_cpardiso_68", 456 "Message level information", 457 "None", 458 mat_mkl_cpardiso->msglvl, 459 &icntl, 460 &flg);CHKERRQ(ierr); 461 if (flg) mat_mkl_cpardiso->msglvl = icntl; 462 463 ierr = PetscOptionsInt("-mat_mkl_cpardiso_69", 464 "Defines the matrix type", 465 "None", 466 mat_mkl_cpardiso->mtype, 467 &icntl, 468 &flg);CHKERRQ(ierr); 469 if(flg){ 470 mat_mkl_cpardiso->mtype = icntl; 471 #if defined(PETSC_USE_REAL_SINGLE) 472 mat_mkl_cpardiso->iparm[27] = 1; 473 #else 474 mat_mkl_cpardiso->iparm[27] = 0; 475 #endif 476 mat_mkl_cpardiso->iparm[34] = 1; 477 } 478 ierr = PetscOptionsInt("-mat_mkl_cpardiso_1", 479 "Use default values", 480 "None", 481 mat_mkl_cpardiso->iparm[0], 482 &icntl, 483 &flg);CHKERRQ(ierr); 484 485 if(flg && icntl != 0){ 486 ierr = PetscOptionsInt("-mat_mkl_cpardiso_2", 487 "Fill-in reducing ordering for the input matrix", 488 "None", 489 mat_mkl_cpardiso->iparm[1], 490 &icntl, 491 &flg);CHKERRQ(ierr); 492 if (flg) mat_mkl_cpardiso->iparm[1] = icntl; 493 494 ierr = PetscOptionsInt("-mat_mkl_cpardiso_4", 495 "Preconditioned CGS/CG", 496 "None", 497 mat_mkl_cpardiso->iparm[3], 498 &icntl, 499 &flg);CHKERRQ(ierr); 500 if (flg) mat_mkl_cpardiso->iparm[3] = icntl; 501 502 ierr = PetscOptionsInt("-mat_mkl_cpardiso_5", 503 "User permutation", 504 "None", 505 mat_mkl_cpardiso->iparm[4], 506 &icntl, 507 &flg);CHKERRQ(ierr); 508 if (flg) mat_mkl_cpardiso->iparm[4] = icntl; 509 510 ierr = PetscOptionsInt("-mat_mkl_cpardiso_6", 511 "Write solution on x", 512 "None", 513 mat_mkl_cpardiso->iparm[5], 514 &icntl, 515 &flg);CHKERRQ(ierr); 516 if (flg) mat_mkl_cpardiso->iparm[5] = icntl; 517 518 ierr = PetscOptionsInt("-mat_mkl_cpardiso_8", 519 "Iterative refinement step", 520 "None", 521 mat_mkl_cpardiso->iparm[7], 522 &icntl, 523 &flg);CHKERRQ(ierr); 524 if (flg) mat_mkl_cpardiso->iparm[7] = icntl; 525 526 ierr = PetscOptionsInt("-mat_mkl_cpardiso_10", 527 "Pivoting perturbation", 528 "None", 529 mat_mkl_cpardiso->iparm[9], 530 &icntl, 531 &flg);CHKERRQ(ierr); 532 if (flg) mat_mkl_cpardiso->iparm[9] = icntl; 533 534 ierr = PetscOptionsInt("-mat_mkl_cpardiso_11", 535 "Scaling vectors", 536 "None", 537 mat_mkl_cpardiso->iparm[10], 538 &icntl, 539 &flg);CHKERRQ(ierr); 540 if (flg) mat_mkl_cpardiso->iparm[10] = icntl; 541 542 ierr = PetscOptionsInt("-mat_mkl_cpardiso_12", 543 "Solve with transposed or conjugate transposed matrix A", 544 "None", 545 mat_mkl_cpardiso->iparm[11], 546 &icntl, 547 &flg);CHKERRQ(ierr); 548 if (flg) mat_mkl_cpardiso->iparm[11] = icntl; 549 550 ierr = PetscOptionsInt("-mat_mkl_cpardiso_13", 551 "Improved accuracy using (non-) symmetric weighted matching", 552 "None", 553 mat_mkl_cpardiso->iparm[12], 554 &icntl, 555 &flg);CHKERRQ(ierr); 556 if (flg) mat_mkl_cpardiso->iparm[12] = icntl; 557 558 ierr = PetscOptionsInt("-mat_mkl_cpardiso_18", 559 "Numbers of non-zero elements", 560 "None", 561 mat_mkl_cpardiso->iparm[17], 562 &icntl, 563 &flg);CHKERRQ(ierr); 564 if (flg) mat_mkl_cpardiso->iparm[17] = icntl; 565 566 ierr = PetscOptionsInt("-mat_mkl_cpardiso_19", 567 "Report number of floating point operations", 568 "None", 569 mat_mkl_cpardiso->iparm[18], 570 &icntl, 571 &flg);CHKERRQ(ierr); 572 if (flg) mat_mkl_cpardiso->iparm[18] = icntl; 573 574 ierr = PetscOptionsInt("-mat_mkl_cpardiso_21", 575 "Pivoting for symmetric indefinite matrices", 576 "None", 577 mat_mkl_cpardiso->iparm[20], 578 &icntl, 579 &flg);CHKERRQ(ierr); 580 if (flg) mat_mkl_cpardiso->iparm[20] = icntl; 581 582 ierr = PetscOptionsInt("-mat_mkl_cpardiso_24", 583 "Parallel factorization control", 584 "None", 585 mat_mkl_cpardiso->iparm[23], 586 &icntl, 587 &flg);CHKERRQ(ierr); 588 if (flg) mat_mkl_cpardiso->iparm[23] = icntl; 589 590 ierr = PetscOptionsInt("-mat_mkl_cpardiso_25", 591 "Parallel forward/backward solve control", 592 "None", 593 mat_mkl_cpardiso->iparm[24], 594 &icntl, 595 &flg);CHKERRQ(ierr); 596 if (flg) mat_mkl_cpardiso->iparm[24] = icntl; 597 598 ierr = PetscOptionsInt("-mat_mkl_cpardiso_27", 599 "Matrix checker", 600 "None", 601 mat_mkl_cpardiso->iparm[26], 602 &icntl, 603 &flg);CHKERRQ(ierr); 604 if (flg) mat_mkl_cpardiso->iparm[26] = icntl; 605 606 ierr = PetscOptionsInt("-mat_mkl_cpardiso_31", 607 "Partial solve and computing selected components of the solution vectors", 608 "None", 609 mat_mkl_cpardiso->iparm[30], 610 &icntl, 611 &flg);CHKERRQ(ierr); 612 if (flg) mat_mkl_cpardiso->iparm[30] = icntl; 613 614 ierr = PetscOptionsInt("-mat_mkl_cpardiso_34", 615 "Optimal number of threads for conditional numerical reproducibility (CNR) mode", 616 "None", 617 mat_mkl_cpardiso->iparm[33], 618 &icntl, 619 &flg);CHKERRQ(ierr); 620 if (flg) mat_mkl_cpardiso->iparm[33] = icntl; 621 622 ierr = PetscOptionsInt("-mat_mkl_cpardiso_60", 623 "Intel MKL_CPARDISO mode", 624 "None", 625 mat_mkl_cpardiso->iparm[59], 626 &icntl, 627 &flg);CHKERRQ(ierr); 628 if (flg) mat_mkl_cpardiso->iparm[59] = icntl; 629 } 630 631 PetscOptionsEnd(); 632 PetscFunctionReturn(0); 633 } 634 635 #undef __FUNCT__ 636 #define __FUNCT__ "PetscInitialize_MKL_CPARDISO" 637 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso) 638 { 639 PetscErrorCode ierr; 640 PetscInt i; 641 PetscMPIInt size; 642 643 PetscFunctionBegin; 644 645 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 646 ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr); 647 648 mat_mkl_cpardiso->CleanUp = PETSC_FALSE; 649 mat_mkl_cpardiso->maxfct = 1; 650 mat_mkl_cpardiso->mnum = 1; 651 mat_mkl_cpardiso->n = A->rmap->N; 652 mat_mkl_cpardiso->msglvl = 0; 653 mat_mkl_cpardiso->nrhs = 1; 654 mat_mkl_cpardiso->err = 0; 655 mat_mkl_cpardiso->phase = -1; 656 #if defined(PETSC_USE_COMPLEX) 657 mat_mkl_cpardiso->mtype = 13; 658 #else 659 mat_mkl_cpardiso->mtype = 11; 660 #endif 661 662 #if defined(PETSC_USE_REAL_SINGLE) 663 mat_mkl_cpardiso->iparm[27] = 1; 664 #else 665 mat_mkl_cpardiso->iparm[27] = 0; 666 #endif 667 668 mat_mkl_cpardiso->iparm[34] = 1; /* C style */ 669 670 mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 671 mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */ 672 mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */ 673 mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */ 674 mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 675 mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 676 mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 677 mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 678 mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 679 mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */ 680 681 mat_mkl_cpardiso->iparm[39] = 0; 682 if (size > 1) { 683 mat_mkl_cpardiso->iparm[39] = 2; 684 mat_mkl_cpardiso->iparm[40] = A->rmap->rstart; 685 mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1; 686 } 687 mat_mkl_cpardiso->perm = 0; 688 PetscFunctionReturn(0); 689 } 690 691 /* 692 * Symbolic decomposition. Mkl_Pardiso analysis phase. 693 */ 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatLUFactorSymbolic_AIJMKL_CPARDISO" 696 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 697 { 698 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 703 704 /* Set MKL_CPARDISO options from the options database */ 705 ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr); 706 707 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 708 709 mat_mkl_cpardiso->n = A->rmap->N; 710 711 /* analysis phase */ 712 /*----------------*/ 713 mat_mkl_cpardiso->phase = JOB_ANALYSIS; 714 715 cluster_sparse_solver ( 716 mat_mkl_cpardiso->pt, 717 &mat_mkl_cpardiso->maxfct, 718 &mat_mkl_cpardiso->mnum, 719 &mat_mkl_cpardiso->mtype, 720 &mat_mkl_cpardiso->phase, 721 &mat_mkl_cpardiso->n, 722 mat_mkl_cpardiso->a, 723 mat_mkl_cpardiso->ia, 724 mat_mkl_cpardiso->ja, 725 mat_mkl_cpardiso->perm, 726 &mat_mkl_cpardiso->nrhs, 727 mat_mkl_cpardiso->iparm, 728 &mat_mkl_cpardiso->msglvl, 729 NULL, 730 NULL, 731 &mat_mkl_cpardiso->comm_mkl_cpardiso, 732 &mat_mkl_cpardiso->err); 733 734 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 735 736 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 737 F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO; 738 F->ops->solve = MatSolve_MKL_CPARDISO; 739 F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 740 F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatView_MKL_CPARDISO" 746 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer) 747 { 748 PetscErrorCode ierr; 749 PetscBool iascii; 750 PetscViewerFormat format; 751 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr; 752 PetscInt i; 753 754 PetscFunctionBegin; 755 /* check if matrix is mkl_cpardiso type */ 756 if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0); 757 758 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 759 if (iascii) { 760 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 761 if (format == PETSC_VIEWER_ASCII_INFO) { 762 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr); 763 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr); 764 for(i = 1; i <= 64; i++){ 765 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr); 766 } 767 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr); 768 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr); 769 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr); 770 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr); 771 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 772 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr); 773 } 774 } 775 PetscFunctionReturn(0); 776 } 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatGetInfo_MKL_CPARDISO" 780 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info) 781 { 782 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->spptr; 783 784 PetscFunctionBegin; 785 info->block_size = 1.0; 786 info->nz_allocated = mat_mkl_cpardiso->nz + 0.0; 787 info->nz_unneeded = 0.0; 788 info->assemblies = 0.0; 789 info->mallocs = 0.0; 790 info->memory = 0.0; 791 info->fill_ratio_given = 0; 792 info->fill_ratio_needed = 0; 793 info->factor_mallocs = 0; 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatMkl_CPardisoSetCntl_MKL_CPARDISO" 799 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival) 800 { 801 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->spptr; 802 803 PetscFunctionBegin; 804 if(icntl <= 64){ 805 mat_mkl_cpardiso->iparm[icntl - 1] = ival; 806 } else { 807 if(icntl == 65) 808 mkl_set_num_threads((int)ival); 809 else if(icntl == 66) 810 mat_mkl_cpardiso->maxfct = ival; 811 else if(icntl == 67) 812 mat_mkl_cpardiso->mnum = ival; 813 else if(icntl == 68) 814 mat_mkl_cpardiso->msglvl = ival; 815 else if(icntl == 69){ 816 int pt[IPARM_SIZE]; 817 mat_mkl_cpardiso->mtype = ival; 818 #if defined(PETSC_USE_REAL_SINGLE) 819 mat_mkl_cpardiso->iparm[27] = 1; 820 #else 821 mat_mkl_cpardiso->iparm[27] = 0; 822 #endif 823 mat_mkl_cpardiso->iparm[34] = 1; 824 } 825 } 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatMkl_CPardisoSetCntl" 831 /*@ 832 MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters 833 834 Logically Collective on Mat 835 836 Input Parameters: 837 + F - the factored matrix obtained by calling MatGetFactor() 838 . icntl - index of Mkl_Pardiso parameter 839 - ival - value of Mkl_Pardiso parameter 840 841 Options Database: 842 . -mat_mkl_cpardiso_<icntl> <ival> 843 844 Level: beginner 845 846 References: 847 . Mkl_Pardiso Users' Guide 848 849 .seealso: MatGetFactor() 850 @*/ 851 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 852 { 853 PetscErrorCode ierr; 854 855 PetscFunctionBegin; 856 ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatFactorGetSolverPackage_mkl_cpardiso" 862 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type) 863 { 864 PetscFunctionBegin; 865 *type = MATSOLVERMKL_CPARDISO; 866 PetscFunctionReturn(0); 867 } 868 869 /* MatGetFactor for MPI AIJ matrices */ 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatGetFactor_mpiaij_mkl_cpardiso" 872 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F) 873 { 874 Mat B; 875 PetscErrorCode ierr; 876 Mat_MKL_CPARDISO *mat_mkl_cpardiso; 877 PetscBool isSeqAIJ; 878 879 PetscFunctionBegin; 880 /* Create the factorization matrix */ 881 882 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 883 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 884 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 885 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 886 887 ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr); 888 889 if (isSeqAIJ) { 890 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 891 mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO; 892 } else { 893 mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO; 894 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 895 } 896 897 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO; 898 B->ops->destroy = MatDestroy_MKL_CPARDISO; 899 900 B->ops->view = MatView_MKL_CPARDISO; 901 B->ops->getinfo = MatGetInfo_MKL_CPARDISO; 902 903 B->factortype = ftype; 904 B->assembled = PETSC_TRUE; /* required by -ksp_view */ 905 906 B->spptr = mat_mkl_cpardiso; 907 908 /* set solvertype */ 909 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 910 ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr); 911 912 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr); 914 ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr); 915 916 *F = B; 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatSolverPackageRegister_MKL_CPardiso" 922 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void) 923 { 924 PetscErrorCode ierr; 925 926 PetscFunctionBegin; 927 ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 928 ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } 931