1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <math.h> 11 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 12 #define MKL_ILP64 13 #endif 14 #include <mkl.h> 15 #include <mkl_cluster_sparse_solver.h> 16 17 /* 18 * Possible mkl_cpardiso phases that controls the execution of the solver. 19 * For more information check mkl_cpardiso manual. 20 */ 21 #define JOB_ANALYSIS 11 22 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 23 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 24 #define JOB_NUMERICAL_FACTORIZATION 22 25 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 26 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 27 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 28 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 29 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 30 #define JOB_RELEASE_OF_LU_MEMORY 0 31 #define JOB_RELEASE_OF_ALL_MEMORY -1 32 33 #define IPARM_SIZE 64 34 #define INT_TYPE MKL_INT 35 36 static const char *Err_MSG_CPardiso(int errNo){ 37 switch (errNo) { 38 case -1: 39 return "input inconsistent"; break; 40 case -2: 41 return "not enough memory"; break; 42 case -3: 43 return "reordering problem"; break; 44 case -4: 45 return "zero pivot, numerical factorization or iterative refinement problem"; break; 46 case -5: 47 return "unclassified (internal) error"; break; 48 case -6: 49 return "preordering failed (matrix types 11, 13 only)"; break; 50 case -7: 51 return "diagonal matrix problem"; break; 52 case -8: 53 return "32-bit integer overflow problem"; break; 54 case -9: 55 return "not enough memory for OOC"; break; 56 case -10: 57 return "problems with opening OOC temporary files"; break; 58 case -11: 59 return "read/write problems with the OOC data file"; break; 60 default : 61 return "unknown error"; 62 } 63 } 64 65 /* 66 * Internal data structure. 67 * For more information check mkl_cpardiso manual. 68 */ 69 70 typedef struct { 71 72 /* Configuration vector */ 73 INT_TYPE iparm[IPARM_SIZE]; 74 75 /* 76 * Internal mkl_cpardiso memory location. 77 * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak. 78 */ 79 void *pt[IPARM_SIZE]; 80 81 MPI_Comm comm_mkl_cpardiso; 82 83 /* Basic mkl_cpardiso info*/ 84 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 85 86 /* Matrix structure */ 87 PetscScalar *a; 88 89 INT_TYPE *ia, *ja; 90 91 /* Number of non-zero elements */ 92 INT_TYPE nz; 93 94 /* Row permutaton vector*/ 95 INT_TYPE *perm; 96 97 /* Define is matrix preserve sparce structure. */ 98 MatStructure matstruc; 99 100 PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**); 101 102 /* True if mkl_cpardiso function have been used. */ 103 PetscBool CleanUp; 104 } Mat_MKL_CPARDISO; 105 106 /* 107 * Copy the elements of matrix A. 108 * Input: 109 * - Mat A: MATSEQAIJ matrix 110 * - int shift: matrix index. 111 * - 0 for c representation 112 * - 1 for fortran representation 113 * - MatReuse reuse: 114 * - MAT_INITIAL_MATRIX: Create a new aij representation 115 * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values 116 * Output: 117 * - int *nnz: Number of nonzero-elements. 118 * - int **r pointer to i index 119 * - int **c pointer to j elements 120 * - MATRIXTYPE **v: Non-zero elements 121 */ 122 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 123 { 124 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 125 126 PetscFunctionBegin; 127 *v=aa->a; 128 if (reuse == MAT_INITIAL_MATRIX) { 129 *r = (INT_TYPE*)aa->i; 130 *c = (INT_TYPE*)aa->j; 131 *nnz = aa->nz; 132 } 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 137 { 138 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 139 PetscErrorCode ierr; 140 PetscInt rstart,nz,i,j,countA,countB; 141 PetscInt *row,*col; 142 const PetscScalar *av, *bv; 143 PetscScalar *val; 144 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 145 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 146 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 147 PetscInt colA_start,jB,jcol; 148 149 PetscFunctionBegin; 150 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 151 av=aa->a; bv=bb->a; 152 153 garray = mat->garray; 154 155 if (reuse == MAT_INITIAL_MATRIX) { 156 nz = aa->nz + bb->nz; 157 *nnz = nz; 158 ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr); 159 col = row + m + 1; 160 val = (PetscScalar*)(col + nz); 161 *r = row; *c = col; *v = val; 162 row[0] = 0; 163 } else { 164 row = *r; col = *c; val = *v; 165 } 166 167 nz = 0; 168 for (i=0; i<m; i++) { 169 row[i] = nz; 170 countA = ai[i+1] - ai[i]; 171 countB = bi[i+1] - bi[i]; 172 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 173 bjj = bj + bi[i]; 174 175 /* B part, smaller col index */ 176 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 177 jB = 0; 178 for (j=0; j<countB; j++) { 179 jcol = garray[bjj[j]]; 180 if (jcol > colA_start) { 181 jB = j; 182 break; 183 } 184 col[nz] = jcol; 185 val[nz++] = *bv++; 186 if (j==countB-1) jB = countB; 187 } 188 189 /* A part */ 190 for (j=0; j<countA; j++) { 191 col[nz] = rstart + ajj[j]; 192 val[nz++] = *av++; 193 } 194 195 /* B part, larger col index */ 196 for (j=jB; j<countB; j++) { 197 col[nz] = garray[bjj[j]]; 198 val[nz++] = *bv++; 199 } 200 } 201 row[m] = nz; 202 203 PetscFunctionReturn(0); 204 } 205 206 /* 207 * Free memory for Mat_MKL_CPARDISO structure and pointers to objects. 208 */ 209 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A) 210 { 211 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 212 PetscErrorCode ierr; 213 214 PetscFunctionBegin; 215 /* Terminate instance, deallocate memories */ 216 if (mat_mkl_cpardiso->CleanUp) { 217 mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 218 219 cluster_sparse_solver ( 220 mat_mkl_cpardiso->pt, 221 &mat_mkl_cpardiso->maxfct, 222 &mat_mkl_cpardiso->mnum, 223 &mat_mkl_cpardiso->mtype, 224 &mat_mkl_cpardiso->phase, 225 &mat_mkl_cpardiso->n, 226 NULL, 227 NULL, 228 NULL, 229 mat_mkl_cpardiso->perm, 230 &mat_mkl_cpardiso->nrhs, 231 mat_mkl_cpardiso->iparm, 232 &mat_mkl_cpardiso->msglvl, 233 NULL, 234 NULL, 235 &mat_mkl_cpardiso->comm_mkl_cpardiso, 236 (PetscInt*)&mat_mkl_cpardiso->err); 237 } 238 239 if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) { 240 ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr); 241 } 242 ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 243 ierr = PetscFree(A->data);CHKERRQ(ierr); 244 245 /* clear composed functions */ 246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 247 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 /* 252 * Computes Ax = b 253 */ 254 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x) 255 { 256 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 257 PetscErrorCode ierr; 258 PetscScalar *xarray; 259 const PetscScalar *barray; 260 261 PetscFunctionBegin; 262 mat_mkl_cpardiso->nrhs = 1; 263 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 264 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 265 266 /* solve phase */ 267 /*-------------*/ 268 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 269 cluster_sparse_solver ( 270 mat_mkl_cpardiso->pt, 271 &mat_mkl_cpardiso->maxfct, 272 &mat_mkl_cpardiso->mnum, 273 &mat_mkl_cpardiso->mtype, 274 &mat_mkl_cpardiso->phase, 275 &mat_mkl_cpardiso->n, 276 mat_mkl_cpardiso->a, 277 mat_mkl_cpardiso->ia, 278 mat_mkl_cpardiso->ja, 279 mat_mkl_cpardiso->perm, 280 &mat_mkl_cpardiso->nrhs, 281 mat_mkl_cpardiso->iparm, 282 &mat_mkl_cpardiso->msglvl, 283 (void*)barray, 284 (void*)xarray, 285 &mat_mkl_cpardiso->comm_mkl_cpardiso, 286 (PetscInt*)&mat_mkl_cpardiso->err); 287 288 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 289 290 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 291 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 292 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 293 PetscFunctionReturn(0); 294 } 295 296 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x) 297 { 298 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 299 PetscErrorCode ierr; 300 301 PetscFunctionBegin; 302 #if defined(PETSC_USE_COMPLEX) 303 mat_mkl_cpardiso->iparm[12 - 1] = 1; 304 #else 305 mat_mkl_cpardiso->iparm[12 - 1] = 2; 306 #endif 307 ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr); 308 mat_mkl_cpardiso->iparm[12 - 1] = 0; 309 PetscFunctionReturn(0); 310 } 311 312 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X) 313 { 314 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 315 PetscErrorCode ierr; 316 PetscScalar *xarray; 317 const PetscScalar *barray; 318 319 PetscFunctionBegin; 320 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 321 322 if(mat_mkl_cpardiso->nrhs > 0){ 323 ierr = MatDenseGetArrayRead(B,&barray); 324 ierr = MatDenseGetArray(X,&xarray); 325 326 /* solve phase */ 327 /*-------------*/ 328 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 329 cluster_sparse_solver ( 330 mat_mkl_cpardiso->pt, 331 &mat_mkl_cpardiso->maxfct, 332 &mat_mkl_cpardiso->mnum, 333 &mat_mkl_cpardiso->mtype, 334 &mat_mkl_cpardiso->phase, 335 &mat_mkl_cpardiso->n, 336 mat_mkl_cpardiso->a, 337 mat_mkl_cpardiso->ia, 338 mat_mkl_cpardiso->ja, 339 mat_mkl_cpardiso->perm, 340 &mat_mkl_cpardiso->nrhs, 341 mat_mkl_cpardiso->iparm, 342 &mat_mkl_cpardiso->msglvl, 343 (void*)barray, 344 (void*)xarray, 345 &mat_mkl_cpardiso->comm_mkl_cpardiso, 346 (PetscInt*)&mat_mkl_cpardiso->err); 347 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 348 ierr = MatDenseRestoreArrayRead(B,&barray); 349 ierr = MatDenseRestoreArray(X,&xarray); 350 351 } 352 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 353 PetscFunctionReturn(0); 354 355 } 356 357 /* 358 * LU Decomposition 359 */ 360 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info) 361 { 362 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data; 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 367 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 368 369 mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION; 370 cluster_sparse_solver ( 371 mat_mkl_cpardiso->pt, 372 &mat_mkl_cpardiso->maxfct, 373 &mat_mkl_cpardiso->mnum, 374 &mat_mkl_cpardiso->mtype, 375 &mat_mkl_cpardiso->phase, 376 &mat_mkl_cpardiso->n, 377 mat_mkl_cpardiso->a, 378 mat_mkl_cpardiso->ia, 379 mat_mkl_cpardiso->ja, 380 mat_mkl_cpardiso->perm, 381 &mat_mkl_cpardiso->nrhs, 382 mat_mkl_cpardiso->iparm, 383 &mat_mkl_cpardiso->msglvl, 384 NULL, 385 NULL, 386 &mat_mkl_cpardiso->comm_mkl_cpardiso, 387 &mat_mkl_cpardiso->err); 388 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 389 390 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 391 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 392 PetscFunctionReturn(0); 393 } 394 395 /* Sets mkl_cpardiso options from the options database */ 396 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A) 397 { 398 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 399 PetscErrorCode ierr; 400 PetscInt icntl,threads; 401 PetscBool flg; 402 403 PetscFunctionBegin; 404 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr); 405 ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr); 406 if (flg) mkl_set_num_threads((int)threads); 407 408 ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr); 409 if (flg) mat_mkl_cpardiso->maxfct = icntl; 410 411 ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr); 412 if (flg) mat_mkl_cpardiso->mnum = icntl; 413 414 ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr); 415 if (flg) mat_mkl_cpardiso->msglvl = icntl; 416 417 ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr); 418 if(flg){ 419 mat_mkl_cpardiso->mtype = icntl; 420 #if defined(PETSC_USE_REAL_SINGLE) 421 mat_mkl_cpardiso->iparm[27] = 1; 422 #else 423 mat_mkl_cpardiso->iparm[27] = 0; 424 #endif 425 mat_mkl_cpardiso->iparm[34] = 1; 426 } 427 ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr); 428 429 if(flg && icntl != 0){ 430 ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr); 431 if (flg) mat_mkl_cpardiso->iparm[1] = icntl; 432 433 ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr); 434 if (flg) mat_mkl_cpardiso->iparm[3] = icntl; 435 436 ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr); 437 if (flg) mat_mkl_cpardiso->iparm[4] = icntl; 438 439 ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr); 440 if (flg) mat_mkl_cpardiso->iparm[5] = icntl; 441 442 ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr); 443 if (flg) mat_mkl_cpardiso->iparm[7] = icntl; 444 445 ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr); 446 if (flg) mat_mkl_cpardiso->iparm[9] = icntl; 447 448 ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr); 449 if (flg) mat_mkl_cpardiso->iparm[10] = icntl; 450 451 ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr); 452 if (flg) mat_mkl_cpardiso->iparm[11] = icntl; 453 454 ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl, 455 &flg);CHKERRQ(ierr); 456 if (flg) mat_mkl_cpardiso->iparm[12] = icntl; 457 458 ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl, 459 &flg);CHKERRQ(ierr); 460 if (flg) mat_mkl_cpardiso->iparm[17] = icntl; 461 462 ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr); 463 if (flg) mat_mkl_cpardiso->iparm[18] = icntl; 464 465 ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr); 466 if (flg) mat_mkl_cpardiso->iparm[20] = icntl; 467 468 ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr); 469 if (flg) mat_mkl_cpardiso->iparm[23] = icntl; 470 471 ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr); 472 if (flg) mat_mkl_cpardiso->iparm[24] = icntl; 473 474 ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr); 475 if (flg) mat_mkl_cpardiso->iparm[26] = icntl; 476 477 ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr); 478 if (flg) mat_mkl_cpardiso->iparm[30] = icntl; 479 480 ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr); 481 if (flg) mat_mkl_cpardiso->iparm[33] = icntl; 482 483 ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr); 484 if (flg) mat_mkl_cpardiso->iparm[59] = icntl; 485 } 486 487 ierr = PetscOptionsEnd();CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso) 492 { 493 PetscErrorCode ierr; 494 PetscMPIInt size; 495 496 PetscFunctionBegin; 497 498 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 499 ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr); 500 501 mat_mkl_cpardiso->CleanUp = PETSC_FALSE; 502 mat_mkl_cpardiso->maxfct = 1; 503 mat_mkl_cpardiso->mnum = 1; 504 mat_mkl_cpardiso->n = A->rmap->N; 505 mat_mkl_cpardiso->msglvl = 0; 506 mat_mkl_cpardiso->nrhs = 1; 507 mat_mkl_cpardiso->err = 0; 508 mat_mkl_cpardiso->phase = -1; 509 #if defined(PETSC_USE_COMPLEX) 510 mat_mkl_cpardiso->mtype = 13; 511 #else 512 mat_mkl_cpardiso->mtype = 11; 513 #endif 514 515 #if defined(PETSC_USE_REAL_SINGLE) 516 mat_mkl_cpardiso->iparm[27] = 1; 517 #else 518 mat_mkl_cpardiso->iparm[27] = 0; 519 #endif 520 521 mat_mkl_cpardiso->iparm[34] = 1; /* C style */ 522 523 mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 524 mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */ 525 mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */ 526 mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */ 527 mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 528 mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 529 mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 530 mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 531 mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 532 mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */ 533 534 mat_mkl_cpardiso->iparm[39] = 0; 535 if (size > 1) { 536 mat_mkl_cpardiso->iparm[39] = 2; 537 mat_mkl_cpardiso->iparm[40] = A->rmap->rstart; 538 mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1; 539 } 540 mat_mkl_cpardiso->perm = 0; 541 PetscFunctionReturn(0); 542 } 543 544 /* 545 * Symbolic decomposition. Mkl_Pardiso analysis phase. 546 */ 547 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 548 { 549 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 554 555 /* Set MKL_CPARDISO options from the options database */ 556 ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr); 557 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 558 559 mat_mkl_cpardiso->n = A->rmap->N; 560 561 /* analysis phase */ 562 /*----------------*/ 563 mat_mkl_cpardiso->phase = JOB_ANALYSIS; 564 565 cluster_sparse_solver ( 566 mat_mkl_cpardiso->pt, 567 &mat_mkl_cpardiso->maxfct, 568 &mat_mkl_cpardiso->mnum, 569 &mat_mkl_cpardiso->mtype, 570 &mat_mkl_cpardiso->phase, 571 &mat_mkl_cpardiso->n, 572 mat_mkl_cpardiso->a, 573 mat_mkl_cpardiso->ia, 574 mat_mkl_cpardiso->ja, 575 mat_mkl_cpardiso->perm, 576 &mat_mkl_cpardiso->nrhs, 577 mat_mkl_cpardiso->iparm, 578 &mat_mkl_cpardiso->msglvl, 579 NULL, 580 NULL, 581 &mat_mkl_cpardiso->comm_mkl_cpardiso, 582 (PetscInt*)&mat_mkl_cpardiso->err); 583 584 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 585 586 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 587 F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO; 588 F->ops->solve = MatSolve_MKL_CPARDISO; 589 F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 590 F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 591 PetscFunctionReturn(0); 592 } 593 594 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer) 595 { 596 PetscErrorCode ierr; 597 PetscBool iascii; 598 PetscViewerFormat format; 599 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 600 PetscInt i; 601 602 PetscFunctionBegin; 603 /* check if matrix is mkl_cpardiso type */ 604 if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0); 605 606 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 607 if (iascii) { 608 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 609 if (format == PETSC_VIEWER_ASCII_INFO) { 610 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr); 611 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr); 612 for(i = 1; i <= 64; i++){ 613 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr); 614 } 615 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr); 616 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr); 617 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr); 618 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr); 619 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 620 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info) 627 { 628 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data; 629 630 PetscFunctionBegin; 631 info->block_size = 1.0; 632 info->nz_allocated = mat_mkl_cpardiso->nz + 0.0; 633 info->nz_unneeded = 0.0; 634 info->assemblies = 0.0; 635 info->mallocs = 0.0; 636 info->memory = 0.0; 637 info->fill_ratio_given = 0; 638 info->fill_ratio_needed = 0; 639 info->factor_mallocs = 0; 640 PetscFunctionReturn(0); 641 } 642 643 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival) 644 { 645 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data; 646 647 PetscFunctionBegin; 648 if(icntl <= 64){ 649 mat_mkl_cpardiso->iparm[icntl - 1] = ival; 650 } else { 651 if(icntl == 65) mkl_set_num_threads((int)ival); 652 else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival; 653 else if(icntl == 67) mat_mkl_cpardiso->mnum = ival; 654 else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival; 655 else if(icntl == 69){ 656 mat_mkl_cpardiso->mtype = ival; 657 #if defined(PETSC_USE_REAL_SINGLE) 658 mat_mkl_cpardiso->iparm[27] = 1; 659 #else 660 mat_mkl_cpardiso->iparm[27] = 0; 661 #endif 662 mat_mkl_cpardiso->iparm[34] = 1; 663 } 664 } 665 PetscFunctionReturn(0); 666 } 667 668 /*@ 669 MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters 670 671 Logically Collective on Mat 672 673 Input Parameters: 674 + F - the factored matrix obtained by calling MatGetFactor() 675 . icntl - index of Mkl_Pardiso parameter 676 - ival - value of Mkl_Pardiso parameter 677 678 Options Database: 679 . -mat_mkl_cpardiso_<icntl> <ival> 680 681 Level: Intermediate 682 683 Notes: 684 This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options 685 database approach when working with TS, SNES, or KSP. 686 687 References: 688 . Mkl_Pardiso Users' Guide 689 690 .seealso: MatGetFactor() 691 @*/ 692 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 693 { 694 PetscErrorCode ierr; 695 696 PetscFunctionBegin; 697 ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 698 PetscFunctionReturn(0); 699 } 700 701 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type) 702 { 703 PetscFunctionBegin; 704 *type = MATSOLVERMKL_CPARDISO; 705 PetscFunctionReturn(0); 706 } 707 708 /* MatGetFactor for MPI AIJ matrices */ 709 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F) 710 { 711 Mat B; 712 PetscErrorCode ierr; 713 Mat_MKL_CPARDISO *mat_mkl_cpardiso; 714 PetscBool isSeqAIJ; 715 716 PetscFunctionBegin; 717 /* Create the factorization matrix */ 718 719 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 720 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 721 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 722 ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr); 723 ierr = MatSetUp(B);CHKERRQ(ierr); 724 725 ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr); 726 727 if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO; 728 else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO; 729 730 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO; 731 B->ops->destroy = MatDestroy_MKL_CPARDISO; 732 733 B->ops->view = MatView_MKL_CPARDISO; 734 B->ops->getinfo = MatGetInfo_MKL_CPARDISO; 735 736 B->factortype = ftype; 737 B->assembled = PETSC_TRUE; /* required by -ksp_view */ 738 739 B->data = mat_mkl_cpardiso; 740 741 /* set solvertype */ 742 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 743 ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr); 744 745 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr); 746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr); 747 ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr); 748 749 *F = B; 750 PetscFunctionReturn(0); 751 } 752 753 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void) 754 { 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 759 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762