1 2 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> 4 5 #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 6 #define MKL_ILP64 7 #endif 8 #include <mkl.h> 9 #include <mkl_cluster_sparse_solver.h> 10 11 /* 12 * Possible mkl_cpardiso phases that controls the execution of the solver. 13 * For more information check mkl_cpardiso manual. 14 */ 15 #define JOB_ANALYSIS 11 16 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 17 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 18 #define JOB_NUMERICAL_FACTORIZATION 22 19 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 20 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 21 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 22 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 23 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 24 #define JOB_RELEASE_OF_LU_MEMORY 0 25 #define JOB_RELEASE_OF_ALL_MEMORY -1 26 27 #define IPARM_SIZE 64 28 #define INT_TYPE MKL_INT 29 30 static const char *Err_MSG_CPardiso(int errNo){ 31 switch (errNo) { 32 case -1: 33 return "input inconsistent"; break; 34 case -2: 35 return "not enough memory"; break; 36 case -3: 37 return "reordering problem"; break; 38 case -4: 39 return "zero pivot, numerical factorization or iterative refinement problem"; break; 40 case -5: 41 return "unclassified (internal) error"; break; 42 case -6: 43 return "preordering failed (matrix types 11, 13 only)"; break; 44 case -7: 45 return "diagonal matrix problem"; break; 46 case -8: 47 return "32-bit integer overflow problem"; break; 48 case -9: 49 return "not enough memory for OOC"; break; 50 case -10: 51 return "problems with opening OOC temporary files"; break; 52 case -11: 53 return "read/write problems with the OOC data file"; break; 54 default : 55 return "unknown error"; 56 } 57 } 58 59 /* 60 * Internal data structure. 61 * For more information check mkl_cpardiso manual. 62 */ 63 64 typedef struct { 65 66 /* Configuration vector */ 67 INT_TYPE iparm[IPARM_SIZE]; 68 69 /* 70 * Internal mkl_cpardiso memory location. 71 * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak. 72 */ 73 void *pt[IPARM_SIZE]; 74 75 MPI_Comm comm_mkl_cpardiso; 76 77 /* Basic mkl_cpardiso info*/ 78 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 79 80 /* Matrix structure */ 81 PetscScalar *a; 82 83 INT_TYPE *ia, *ja; 84 85 /* Number of non-zero elements */ 86 INT_TYPE nz; 87 88 /* Row permutaton vector*/ 89 INT_TYPE *perm; 90 91 /* Define is matrix preserve sparce structure. */ 92 MatStructure matstruc; 93 94 PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**); 95 96 /* True if mkl_cpardiso function have been used. */ 97 PetscBool CleanUp; 98 } Mat_MKL_CPARDISO; 99 100 /* 101 * Copy the elements of matrix A. 102 * Input: 103 * - Mat A: MATSEQAIJ matrix 104 * - int shift: matrix index. 105 * - 0 for c representation 106 * - 1 for fortran representation 107 * - MatReuse reuse: 108 * - MAT_INITIAL_MATRIX: Create a new aij representation 109 * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values 110 * Output: 111 * - int *nnz: Number of nonzero-elements. 112 * - int **r pointer to i index 113 * - int **c pointer to j elements 114 * - MATRIXTYPE **v: Non-zero elements 115 */ 116 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 117 { 118 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 119 120 PetscFunctionBegin; 121 *v=aa->a; 122 if (reuse == MAT_INITIAL_MATRIX) { 123 *r = (INT_TYPE*)aa->i; 124 *c = (INT_TYPE*)aa->j; 125 *nnz = aa->nz; 126 } 127 PetscFunctionReturn(0); 128 } 129 130 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 131 { 132 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 133 PetscErrorCode ierr; 134 PetscInt rstart,nz,i,j,countA,countB; 135 PetscInt *row,*col; 136 const PetscScalar *av, *bv; 137 PetscScalar *val; 138 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 139 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 140 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 141 PetscInt colA_start,jB,jcol; 142 143 PetscFunctionBegin; 144 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 145 av=aa->a; bv=bb->a; 146 147 garray = mat->garray; 148 149 if (reuse == MAT_INITIAL_MATRIX) { 150 nz = aa->nz + bb->nz; 151 *nnz = nz; 152 ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr); 153 col = row + m + 1; 154 val = (PetscScalar*)(col + nz); 155 *r = row; *c = col; *v = val; 156 row[0] = 0; 157 } else { 158 row = *r; col = *c; val = *v; 159 } 160 161 nz = 0; 162 for (i=0; i<m; i++) { 163 row[i] = nz; 164 countA = ai[i+1] - ai[i]; 165 countB = bi[i+1] - bi[i]; 166 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 167 bjj = bj + bi[i]; 168 169 /* B part, smaller col index */ 170 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 171 jB = 0; 172 for (j=0; j<countB; j++) { 173 jcol = garray[bjj[j]]; 174 if (jcol > colA_start) { 175 jB = j; 176 break; 177 } 178 col[nz] = jcol; 179 val[nz++] = *bv++; 180 if (j==countB-1) jB = countB; 181 } 182 183 /* A part */ 184 for (j=0; j<countA; j++) { 185 col[nz] = rstart + ajj[j]; 186 val[nz++] = *av++; 187 } 188 189 /* B part, larger col index */ 190 for (j=jB; j<countB; j++) { 191 col[nz] = garray[bjj[j]]; 192 val[nz++] = *bv++; 193 } 194 } 195 row[m] = nz; 196 197 PetscFunctionReturn(0); 198 } 199 200 /* 201 * Free memory for Mat_MKL_CPARDISO structure and pointers to objects. 202 */ 203 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A) 204 { 205 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 206 PetscErrorCode ierr; 207 208 PetscFunctionBegin; 209 /* Terminate instance, deallocate memories */ 210 if (mat_mkl_cpardiso->CleanUp) { 211 mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 212 213 cluster_sparse_solver ( 214 mat_mkl_cpardiso->pt, 215 &mat_mkl_cpardiso->maxfct, 216 &mat_mkl_cpardiso->mnum, 217 &mat_mkl_cpardiso->mtype, 218 &mat_mkl_cpardiso->phase, 219 &mat_mkl_cpardiso->n, 220 NULL, 221 NULL, 222 NULL, 223 mat_mkl_cpardiso->perm, 224 &mat_mkl_cpardiso->nrhs, 225 mat_mkl_cpardiso->iparm, 226 &mat_mkl_cpardiso->msglvl, 227 NULL, 228 NULL, 229 &mat_mkl_cpardiso->comm_mkl_cpardiso, 230 (PetscInt*)&mat_mkl_cpardiso->err); 231 } 232 233 if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) { 234 ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr); 235 } 236 ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 237 ierr = PetscFree(A->data);CHKERRQ(ierr); 238 239 /* clear composed functions */ 240 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 241 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr); 242 PetscFunctionReturn(0); 243 } 244 245 /* 246 * Computes Ax = b 247 */ 248 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x) 249 { 250 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 251 PetscErrorCode ierr; 252 PetscScalar *xarray; 253 const PetscScalar *barray; 254 255 PetscFunctionBegin; 256 mat_mkl_cpardiso->nrhs = 1; 257 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 258 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 259 260 /* solve phase */ 261 /*-------------*/ 262 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 263 cluster_sparse_solver ( 264 mat_mkl_cpardiso->pt, 265 &mat_mkl_cpardiso->maxfct, 266 &mat_mkl_cpardiso->mnum, 267 &mat_mkl_cpardiso->mtype, 268 &mat_mkl_cpardiso->phase, 269 &mat_mkl_cpardiso->n, 270 mat_mkl_cpardiso->a, 271 mat_mkl_cpardiso->ia, 272 mat_mkl_cpardiso->ja, 273 mat_mkl_cpardiso->perm, 274 &mat_mkl_cpardiso->nrhs, 275 mat_mkl_cpardiso->iparm, 276 &mat_mkl_cpardiso->msglvl, 277 (void*)barray, 278 (void*)xarray, 279 &mat_mkl_cpardiso->comm_mkl_cpardiso, 280 (PetscInt*)&mat_mkl_cpardiso->err); 281 282 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 283 284 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 285 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 286 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 287 PetscFunctionReturn(0); 288 } 289 290 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x) 291 { 292 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 293 PetscErrorCode ierr; 294 295 PetscFunctionBegin; 296 #if defined(PETSC_USE_COMPLEX) 297 mat_mkl_cpardiso->iparm[12 - 1] = 1; 298 #else 299 mat_mkl_cpardiso->iparm[12 - 1] = 2; 300 #endif 301 ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr); 302 mat_mkl_cpardiso->iparm[12 - 1] = 0; 303 PetscFunctionReturn(0); 304 } 305 306 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X) 307 { 308 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 309 PetscErrorCode ierr; 310 PetscScalar *xarray; 311 const PetscScalar *barray; 312 313 PetscFunctionBegin; 314 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 315 316 if(mat_mkl_cpardiso->nrhs > 0){ 317 ierr = MatDenseGetArrayRead(B,&barray); 318 ierr = MatDenseGetArray(X,&xarray); 319 320 /* solve phase */ 321 /*-------------*/ 322 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 323 cluster_sparse_solver ( 324 mat_mkl_cpardiso->pt, 325 &mat_mkl_cpardiso->maxfct, 326 &mat_mkl_cpardiso->mnum, 327 &mat_mkl_cpardiso->mtype, 328 &mat_mkl_cpardiso->phase, 329 &mat_mkl_cpardiso->n, 330 mat_mkl_cpardiso->a, 331 mat_mkl_cpardiso->ia, 332 mat_mkl_cpardiso->ja, 333 mat_mkl_cpardiso->perm, 334 &mat_mkl_cpardiso->nrhs, 335 mat_mkl_cpardiso->iparm, 336 &mat_mkl_cpardiso->msglvl, 337 (void*)barray, 338 (void*)xarray, 339 &mat_mkl_cpardiso->comm_mkl_cpardiso, 340 (PetscInt*)&mat_mkl_cpardiso->err); 341 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 342 ierr = MatDenseRestoreArrayRead(B,&barray); 343 ierr = MatDenseRestoreArray(X,&xarray); 344 345 } 346 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 347 PetscFunctionReturn(0); 348 349 } 350 351 /* 352 * LU Decomposition 353 */ 354 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info) 355 { 356 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data; 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 361 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 362 363 mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION; 364 cluster_sparse_solver ( 365 mat_mkl_cpardiso->pt, 366 &mat_mkl_cpardiso->maxfct, 367 &mat_mkl_cpardiso->mnum, 368 &mat_mkl_cpardiso->mtype, 369 &mat_mkl_cpardiso->phase, 370 &mat_mkl_cpardiso->n, 371 mat_mkl_cpardiso->a, 372 mat_mkl_cpardiso->ia, 373 mat_mkl_cpardiso->ja, 374 mat_mkl_cpardiso->perm, 375 &mat_mkl_cpardiso->nrhs, 376 mat_mkl_cpardiso->iparm, 377 &mat_mkl_cpardiso->msglvl, 378 NULL, 379 NULL, 380 &mat_mkl_cpardiso->comm_mkl_cpardiso, 381 &mat_mkl_cpardiso->err); 382 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 383 384 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 385 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 386 PetscFunctionReturn(0); 387 } 388 389 /* Sets mkl_cpardiso options from the options database */ 390 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A) 391 { 392 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 393 PetscErrorCode ierr; 394 PetscInt icntl,threads; 395 PetscBool flg; 396 397 PetscFunctionBegin; 398 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr); 399 ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr); 400 if (flg) mkl_set_num_threads((int)threads); 401 402 ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr); 403 if (flg) mat_mkl_cpardiso->maxfct = icntl; 404 405 ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr); 406 if (flg) mat_mkl_cpardiso->mnum = icntl; 407 408 ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr); 409 if (flg) mat_mkl_cpardiso->msglvl = icntl; 410 411 ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr); 412 if(flg){ 413 mat_mkl_cpardiso->mtype = icntl; 414 #if defined(PETSC_USE_REAL_SINGLE) 415 mat_mkl_cpardiso->iparm[27] = 1; 416 #else 417 mat_mkl_cpardiso->iparm[27] = 0; 418 #endif 419 mat_mkl_cpardiso->iparm[34] = 1; 420 } 421 ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr); 422 423 if(flg && icntl != 0){ 424 ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr); 425 if (flg) mat_mkl_cpardiso->iparm[1] = icntl; 426 427 ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr); 428 if (flg) mat_mkl_cpardiso->iparm[3] = icntl; 429 430 ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr); 431 if (flg) mat_mkl_cpardiso->iparm[4] = icntl; 432 433 ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr); 434 if (flg) mat_mkl_cpardiso->iparm[5] = icntl; 435 436 ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr); 437 if (flg) mat_mkl_cpardiso->iparm[7] = icntl; 438 439 ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr); 440 if (flg) mat_mkl_cpardiso->iparm[9] = icntl; 441 442 ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr); 443 if (flg) mat_mkl_cpardiso->iparm[10] = icntl; 444 445 ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr); 446 if (flg) mat_mkl_cpardiso->iparm[11] = icntl; 447 448 ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl, 449 &flg);CHKERRQ(ierr); 450 if (flg) mat_mkl_cpardiso->iparm[12] = icntl; 451 452 ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl, 453 &flg);CHKERRQ(ierr); 454 if (flg) mat_mkl_cpardiso->iparm[17] = icntl; 455 456 ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr); 457 if (flg) mat_mkl_cpardiso->iparm[18] = icntl; 458 459 ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr); 460 if (flg) mat_mkl_cpardiso->iparm[20] = icntl; 461 462 ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr); 463 if (flg) mat_mkl_cpardiso->iparm[23] = icntl; 464 465 ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr); 466 if (flg) mat_mkl_cpardiso->iparm[24] = icntl; 467 468 ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr); 469 if (flg) mat_mkl_cpardiso->iparm[26] = icntl; 470 471 ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr); 472 if (flg) mat_mkl_cpardiso->iparm[30] = icntl; 473 474 ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr); 475 if (flg) mat_mkl_cpardiso->iparm[33] = icntl; 476 477 ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr); 478 if (flg) mat_mkl_cpardiso->iparm[59] = icntl; 479 } 480 481 ierr = PetscOptionsEnd();CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso) 486 { 487 PetscErrorCode ierr; 488 PetscMPIInt size; 489 490 PetscFunctionBegin; 491 492 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 493 ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr); 494 495 mat_mkl_cpardiso->CleanUp = PETSC_FALSE; 496 mat_mkl_cpardiso->maxfct = 1; 497 mat_mkl_cpardiso->mnum = 1; 498 mat_mkl_cpardiso->n = A->rmap->N; 499 mat_mkl_cpardiso->msglvl = 0; 500 mat_mkl_cpardiso->nrhs = 1; 501 mat_mkl_cpardiso->err = 0; 502 mat_mkl_cpardiso->phase = -1; 503 #if defined(PETSC_USE_COMPLEX) 504 mat_mkl_cpardiso->mtype = 13; 505 #else 506 mat_mkl_cpardiso->mtype = 11; 507 #endif 508 509 #if defined(PETSC_USE_REAL_SINGLE) 510 mat_mkl_cpardiso->iparm[27] = 1; 511 #else 512 mat_mkl_cpardiso->iparm[27] = 0; 513 #endif 514 515 mat_mkl_cpardiso->iparm[34] = 1; /* C style */ 516 517 mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 518 mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */ 519 mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */ 520 mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */ 521 mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 522 mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 523 mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 524 mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 525 mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 526 mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */ 527 528 mat_mkl_cpardiso->iparm[39] = 0; 529 if (size > 1) { 530 mat_mkl_cpardiso->iparm[39] = 2; 531 mat_mkl_cpardiso->iparm[40] = A->rmap->rstart; 532 mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1; 533 } 534 mat_mkl_cpardiso->perm = 0; 535 PetscFunctionReturn(0); 536 } 537 538 /* 539 * Symbolic decomposition. Mkl_Pardiso analysis phase. 540 */ 541 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 542 { 543 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 544 PetscErrorCode ierr; 545 546 PetscFunctionBegin; 547 mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 548 549 /* Set MKL_CPARDISO options from the options database */ 550 ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr); 551 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 552 553 mat_mkl_cpardiso->n = A->rmap->N; 554 555 /* analysis phase */ 556 /*----------------*/ 557 mat_mkl_cpardiso->phase = JOB_ANALYSIS; 558 559 cluster_sparse_solver ( 560 mat_mkl_cpardiso->pt, 561 &mat_mkl_cpardiso->maxfct, 562 &mat_mkl_cpardiso->mnum, 563 &mat_mkl_cpardiso->mtype, 564 &mat_mkl_cpardiso->phase, 565 &mat_mkl_cpardiso->n, 566 mat_mkl_cpardiso->a, 567 mat_mkl_cpardiso->ia, 568 mat_mkl_cpardiso->ja, 569 mat_mkl_cpardiso->perm, 570 &mat_mkl_cpardiso->nrhs, 571 mat_mkl_cpardiso->iparm, 572 &mat_mkl_cpardiso->msglvl, 573 NULL, 574 NULL, 575 &mat_mkl_cpardiso->comm_mkl_cpardiso, 576 (PetscInt*)&mat_mkl_cpardiso->err); 577 578 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 579 580 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 581 F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO; 582 F->ops->solve = MatSolve_MKL_CPARDISO; 583 F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 584 F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 585 PetscFunctionReturn(0); 586 } 587 588 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer) 589 { 590 PetscErrorCode ierr; 591 PetscBool iascii; 592 PetscViewerFormat format; 593 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 594 PetscInt i; 595 596 PetscFunctionBegin; 597 /* check if matrix is mkl_cpardiso type */ 598 if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0); 599 600 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 601 if (iascii) { 602 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 603 if (format == PETSC_VIEWER_ASCII_INFO) { 604 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr); 605 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr); 606 for(i = 1; i <= 64; i++){ 607 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr); 608 } 609 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr); 610 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr); 611 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr); 612 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr); 613 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 614 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr); 615 } 616 } 617 PetscFunctionReturn(0); 618 } 619 620 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info) 621 { 622 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data; 623 624 PetscFunctionBegin; 625 info->block_size = 1.0; 626 info->nz_allocated = mat_mkl_cpardiso->nz + 0.0; 627 info->nz_unneeded = 0.0; 628 info->assemblies = 0.0; 629 info->mallocs = 0.0; 630 info->memory = 0.0; 631 info->fill_ratio_given = 0; 632 info->fill_ratio_needed = 0; 633 info->factor_mallocs = 0; 634 PetscFunctionReturn(0); 635 } 636 637 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival) 638 { 639 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data; 640 641 PetscFunctionBegin; 642 if(icntl <= 64){ 643 mat_mkl_cpardiso->iparm[icntl - 1] = ival; 644 } else { 645 if(icntl == 65) mkl_set_num_threads((int)ival); 646 else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival; 647 else if(icntl == 67) mat_mkl_cpardiso->mnum = ival; 648 else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival; 649 else if(icntl == 69){ 650 mat_mkl_cpardiso->mtype = ival; 651 #if defined(PETSC_USE_REAL_SINGLE) 652 mat_mkl_cpardiso->iparm[27] = 1; 653 #else 654 mat_mkl_cpardiso->iparm[27] = 0; 655 #endif 656 mat_mkl_cpardiso->iparm[34] = 1; 657 } 658 } 659 PetscFunctionReturn(0); 660 } 661 662 /*@ 663 MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters 664 665 Logically Collective on Mat 666 667 Input Parameters: 668 + F - the factored matrix obtained by calling MatGetFactor() 669 . icntl - index of Mkl_Pardiso parameter 670 - ival - value of Mkl_Pardiso parameter 671 672 Options Database: 673 . -mat_mkl_cpardiso_<icntl> <ival> 674 675 Level: Intermediate 676 677 Notes: 678 This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options 679 database approach when working with TS, SNES, or KSP. 680 681 References: 682 . Mkl_Pardiso Users' Guide 683 684 .seealso: MatGetFactor() 685 @*/ 686 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 687 { 688 PetscErrorCode ierr; 689 690 PetscFunctionBegin; 691 ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type) 696 { 697 PetscFunctionBegin; 698 *type = MATSOLVERMKL_CPARDISO; 699 PetscFunctionReturn(0); 700 } 701 702 /* MatGetFactor for MPI AIJ matrices */ 703 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F) 704 { 705 Mat B; 706 PetscErrorCode ierr; 707 Mat_MKL_CPARDISO *mat_mkl_cpardiso; 708 PetscBool isSeqAIJ; 709 710 PetscFunctionBegin; 711 /* Create the factorization matrix */ 712 713 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 714 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 715 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 716 ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr); 717 ierr = MatSetUp(B);CHKERRQ(ierr); 718 719 ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr); 720 721 if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO; 722 else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO; 723 724 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO; 725 B->ops->destroy = MatDestroy_MKL_CPARDISO; 726 727 B->ops->view = MatView_MKL_CPARDISO; 728 B->ops->getinfo = MatGetInfo_MKL_CPARDISO; 729 730 B->factortype = ftype; 731 B->assembled = PETSC_TRUE; /* required by -ksp_view */ 732 733 B->data = mat_mkl_cpardiso; 734 735 /* set solvertype */ 736 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 737 ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr); 738 739 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr); 740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr); 741 ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr); 742 743 *F = B; 744 PetscFunctionReturn(0); 745 } 746 747 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void) 748 { 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 753 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756