1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <math.h> 11 #include <mkl.h> 12 #include <mkl_cluster_sparse_solver.h> 13 14 /* 15 * Possible mkl_cpardiso phases that controls the execution of the solver. 16 * For more information check mkl_cpardiso manual. 17 */ 18 #define JOB_ANALYSIS 11 19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 21 #define JOB_NUMERICAL_FACTORIZATION 22 22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 27 #define JOB_RELEASE_OF_LU_MEMORY 0 28 #define JOB_RELEASE_OF_ALL_MEMORY -1 29 30 #define IPARM_SIZE 64 31 #define INT_TYPE MKL_INT 32 33 static const char *Err_MSG_CPardiso(int errNo){ 34 switch (errNo) { 35 case -1: 36 return "input inconsistent"; break; 37 case -2: 38 return "not enough memory"; break; 39 case -3: 40 return "reordering problem"; break; 41 case -4: 42 return "zero pivot, numerical factorization or iterative refinement problem"; break; 43 case -5: 44 return "unclassified (internal) error"; break; 45 case -6: 46 return "preordering failed (matrix types 11, 13 only)"; break; 47 case -7: 48 return "diagonal matrix problem"; break; 49 case -8: 50 return "32-bit integer overflow problem"; break; 51 case -9: 52 return "not enough memory for OOC"; break; 53 case -10: 54 return "problems with opening OOC temporary files"; break; 55 case -11: 56 return "read/write problems with the OOC data file"; break; 57 default : 58 return "unknown error"; 59 } 60 } 61 62 /* 63 * Internal data structure. 64 * For more information check mkl_cpardiso manual. 65 */ 66 67 typedef struct { 68 69 /* Configuration vector */ 70 INT_TYPE iparm[IPARM_SIZE]; 71 72 /* 73 * Internal mkl_cpardiso memory location. 74 * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak. 75 */ 76 void *pt[IPARM_SIZE]; 77 78 MPI_Comm comm_mkl_cpardiso; 79 80 /* Basic mkl_cpardiso info*/ 81 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 82 83 /* Matrix structure */ 84 PetscScalar *a; 85 86 INT_TYPE *ia, *ja; 87 88 /* Number of non-zero elements */ 89 INT_TYPE nz; 90 91 /* Row permutaton vector*/ 92 INT_TYPE *perm; 93 94 /* Define is matrix preserve sparce structure. */ 95 MatStructure matstruc; 96 97 PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**); 98 99 /* True if mkl_cpardiso function have been used. */ 100 PetscBool CleanUp; 101 } Mat_MKL_CPARDISO; 102 103 /* 104 * Copy the elements of matrix A. 105 * Input: 106 * - Mat A: MATSEQAIJ matrix 107 * - int shift: matrix index. 108 * - 0 for c representation 109 * - 1 for fortran representation 110 * - MatReuse reuse: 111 * - MAT_INITIAL_MATRIX: Create a new aij representation 112 * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values 113 * Output: 114 * - int *nnz: Number of nonzero-elements. 115 * - int **r pointer to i index 116 * - int **c pointer to j elements 117 * - MATRIXTYPE **v: Non-zero elements 118 */ 119 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 120 { 121 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 122 123 PetscFunctionBegin; 124 *v=aa->a; 125 if (reuse == MAT_INITIAL_MATRIX) { 126 *r = (INT_TYPE*)aa->i; 127 *c = (INT_TYPE*)aa->j; 128 *nnz = aa->nz; 129 } 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 134 { 135 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 136 PetscErrorCode ierr; 137 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 138 PetscInt *row,*col; 139 const PetscScalar *av, *bv,*v1,*v2; 140 PetscScalar *val; 141 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 142 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 143 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 144 PetscInt nn, colA_start,jB,jcol; 145 146 PetscFunctionBegin; 147 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 148 av=aa->a; bv=bb->a; 149 150 garray = mat->garray; 151 152 if (reuse == MAT_INITIAL_MATRIX) { 153 nz = aa->nz + bb->nz; 154 *nnz = nz; 155 ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr); 156 col = row + m + 1; 157 val = (PetscScalar*)(col + nz); 158 *r = row; *c = col; *v = val; 159 row[0] = 0; 160 } else { 161 row = *r; col = *c; val = *v; 162 } 163 164 nz = 0; 165 for (i=0; i<m; i++) { 166 row[i] = nz; 167 countA = ai[i+1] - ai[i]; 168 countB = bi[i+1] - bi[i]; 169 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 170 bjj = bj + bi[i]; 171 172 /* B part, smaller col index */ 173 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 174 jB = 0; 175 for (j=0; j<countB; j++) { 176 jcol = garray[bjj[j]]; 177 if (jcol > colA_start) { 178 jB = j; 179 break; 180 } 181 col[nz] = jcol; 182 val[nz++] = *bv++; 183 if (j==countB-1) jB = countB; 184 } 185 186 /* A part */ 187 for (j=0; j<countA; j++) { 188 col[nz] = rstart + ajj[j]; 189 val[nz++] = *av++; 190 } 191 192 /* B part, larger col index */ 193 for (j=jB; j<countB; j++) { 194 col[nz] = garray[bjj[j]]; 195 val[nz++] = *bv++; 196 } 197 } 198 row[m] = nz; 199 200 PetscFunctionReturn(0); 201 } 202 203 /* 204 * Free memory for Mat_MKL_CPARDISO structure and pointers to objects. 205 */ 206 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A) 207 { 208 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 /* Terminate instance, deallocate memories */ 213 if (mat_mkl_cpardiso->CleanUp) { 214 mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 215 216 cluster_sparse_solver ( 217 mat_mkl_cpardiso->pt, 218 &mat_mkl_cpardiso->maxfct, 219 &mat_mkl_cpardiso->mnum, 220 &mat_mkl_cpardiso->mtype, 221 &mat_mkl_cpardiso->phase, 222 &mat_mkl_cpardiso->n, 223 NULL, 224 NULL, 225 NULL, 226 mat_mkl_cpardiso->perm, 227 &mat_mkl_cpardiso->nrhs, 228 mat_mkl_cpardiso->iparm, 229 &mat_mkl_cpardiso->msglvl, 230 NULL, 231 NULL, 232 &mat_mkl_cpardiso->comm_mkl_cpardiso, 233 &mat_mkl_cpardiso->err); 234 } 235 236 if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) { 237 ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr); 238 } 239 ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 240 ierr = PetscFree(A->data);CHKERRQ(ierr); 241 242 /* clear composed functions */ 243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr); 244 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 /* 249 * Computes Ax = b 250 */ 251 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x) 252 { 253 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 254 PetscErrorCode ierr; 255 PetscScalar *xarray; 256 const PetscScalar *barray; 257 258 PetscFunctionBegin; 259 mat_mkl_cpardiso->nrhs = 1; 260 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 261 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 262 263 /* solve phase */ 264 /*-------------*/ 265 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 266 cluster_sparse_solver ( 267 mat_mkl_cpardiso->pt, 268 &mat_mkl_cpardiso->maxfct, 269 &mat_mkl_cpardiso->mnum, 270 &mat_mkl_cpardiso->mtype, 271 &mat_mkl_cpardiso->phase, 272 &mat_mkl_cpardiso->n, 273 mat_mkl_cpardiso->a, 274 mat_mkl_cpardiso->ia, 275 mat_mkl_cpardiso->ja, 276 mat_mkl_cpardiso->perm, 277 &mat_mkl_cpardiso->nrhs, 278 mat_mkl_cpardiso->iparm, 279 &mat_mkl_cpardiso->msglvl, 280 (void*)barray, 281 (void*)xarray, 282 &mat_mkl_cpardiso->comm_mkl_cpardiso, 283 &mat_mkl_cpardiso->err); 284 285 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 286 287 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 288 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 289 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 290 PetscFunctionReturn(0); 291 } 292 293 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x) 294 { 295 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 296 PetscErrorCode ierr; 297 298 PetscFunctionBegin; 299 #if defined(PETSC_USE_COMPLEX) 300 mat_mkl_cpardiso->iparm[12 - 1] = 1; 301 #else 302 mat_mkl_cpardiso->iparm[12 - 1] = 2; 303 #endif 304 ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr); 305 mat_mkl_cpardiso->iparm[12 - 1] = 0; 306 PetscFunctionReturn(0); 307 } 308 309 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X) 310 { 311 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 312 PetscErrorCode ierr; 313 PetscScalar *barray, *xarray; 314 PetscBool flg; 315 316 PetscFunctionBegin; 317 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr); 318 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix"); 319 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr); 320 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix"); 321 322 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 323 324 if(mat_mkl_cpardiso->nrhs > 0){ 325 ierr = MatDenseGetArray(B,&barray); 326 ierr = MatDenseGetArray(X,&xarray); 327 328 /* solve phase */ 329 /*-------------*/ 330 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 331 cluster_sparse_solver ( 332 mat_mkl_cpardiso->pt, 333 &mat_mkl_cpardiso->maxfct, 334 &mat_mkl_cpardiso->mnum, 335 &mat_mkl_cpardiso->mtype, 336 &mat_mkl_cpardiso->phase, 337 &mat_mkl_cpardiso->n, 338 mat_mkl_cpardiso->a, 339 mat_mkl_cpardiso->ia, 340 mat_mkl_cpardiso->ja, 341 mat_mkl_cpardiso->perm, 342 &mat_mkl_cpardiso->nrhs, 343 mat_mkl_cpardiso->iparm, 344 &mat_mkl_cpardiso->msglvl, 345 (void*)barray, 346 (void*)xarray, 347 &mat_mkl_cpardiso->comm_mkl_cpardiso, 348 &mat_mkl_cpardiso->err); 349 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 350 } 351 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 352 PetscFunctionReturn(0); 353 354 } 355 356 /* 357 * LU Decomposition 358 */ 359 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info) 360 { 361 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data; 362 PetscErrorCode ierr; 363 364 PetscFunctionBegin; 365 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 366 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 367 368 mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION; 369 cluster_sparse_solver ( 370 mat_mkl_cpardiso->pt, 371 &mat_mkl_cpardiso->maxfct, 372 &mat_mkl_cpardiso->mnum, 373 &mat_mkl_cpardiso->mtype, 374 &mat_mkl_cpardiso->phase, 375 &mat_mkl_cpardiso->n, 376 mat_mkl_cpardiso->a, 377 mat_mkl_cpardiso->ia, 378 mat_mkl_cpardiso->ja, 379 mat_mkl_cpardiso->perm, 380 &mat_mkl_cpardiso->nrhs, 381 mat_mkl_cpardiso->iparm, 382 &mat_mkl_cpardiso->msglvl, 383 NULL, 384 NULL, 385 &mat_mkl_cpardiso->comm_mkl_cpardiso, 386 &mat_mkl_cpardiso->err); 387 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 388 389 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 390 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 391 PetscFunctionReturn(0); 392 } 393 394 /* Sets mkl_cpardiso options from the options database */ 395 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A) 396 { 397 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 398 PetscErrorCode ierr; 399 PetscInt icntl; 400 PetscBool flg; 401 int pt[IPARM_SIZE], threads; 402 403 PetscFunctionBegin; 404 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr); 405 ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr); 406 if (flg) mkl_set_num_threads(threads); 407 408 ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr); 409 if (flg) mat_mkl_cpardiso->maxfct = icntl; 410 411 ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr); 412 if (flg) mat_mkl_cpardiso->mnum = icntl; 413 414 ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr); 415 if (flg) mat_mkl_cpardiso->msglvl = icntl; 416 417 ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr); 418 if(flg){ 419 mat_mkl_cpardiso->mtype = icntl; 420 #if defined(PETSC_USE_REAL_SINGLE) 421 mat_mkl_cpardiso->iparm[27] = 1; 422 #else 423 mat_mkl_cpardiso->iparm[27] = 0; 424 #endif 425 mat_mkl_cpardiso->iparm[34] = 1; 426 } 427 ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr); 428 429 if(flg && icntl != 0){ 430 ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr); 431 if (flg) mat_mkl_cpardiso->iparm[1] = icntl; 432 433 ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr); 434 if (flg) mat_mkl_cpardiso->iparm[3] = icntl; 435 436 ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr); 437 if (flg) mat_mkl_cpardiso->iparm[4] = icntl; 438 439 ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr); 440 if (flg) mat_mkl_cpardiso->iparm[5] = icntl; 441 442 ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr); 443 if (flg) mat_mkl_cpardiso->iparm[7] = icntl; 444 445 ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr); 446 if (flg) mat_mkl_cpardiso->iparm[9] = icntl; 447 448 ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr); 449 if (flg) mat_mkl_cpardiso->iparm[10] = icntl; 450 451 ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr); 452 if (flg) mat_mkl_cpardiso->iparm[11] = icntl; 453 454 ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl, 455 &flg);CHKERRQ(ierr); 456 if (flg) mat_mkl_cpardiso->iparm[12] = icntl; 457 458 ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl, 459 &flg);CHKERRQ(ierr); 460 if (flg) mat_mkl_cpardiso->iparm[17] = icntl; 461 462 ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr); 463 if (flg) mat_mkl_cpardiso->iparm[18] = icntl; 464 465 ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr); 466 if (flg) mat_mkl_cpardiso->iparm[20] = icntl; 467 468 ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr); 469 if (flg) mat_mkl_cpardiso->iparm[23] = icntl; 470 471 ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr); 472 if (flg) mat_mkl_cpardiso->iparm[24] = icntl; 473 474 ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr); 475 if (flg) mat_mkl_cpardiso->iparm[26] = icntl; 476 477 ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr); 478 if (flg) mat_mkl_cpardiso->iparm[30] = icntl; 479 480 ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr); 481 if (flg) mat_mkl_cpardiso->iparm[33] = icntl; 482 483 ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr); 484 if (flg) mat_mkl_cpardiso->iparm[59] = icntl; 485 } 486 487 ierr = PetscOptionsEnd();CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso) 492 { 493 PetscErrorCode ierr; 494 PetscInt i; 495 PetscMPIInt size; 496 497 PetscFunctionBegin; 498 499 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 500 ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr); 501 502 mat_mkl_cpardiso->CleanUp = PETSC_FALSE; 503 mat_mkl_cpardiso->maxfct = 1; 504 mat_mkl_cpardiso->mnum = 1; 505 mat_mkl_cpardiso->n = A->rmap->N; 506 mat_mkl_cpardiso->msglvl = 0; 507 mat_mkl_cpardiso->nrhs = 1; 508 mat_mkl_cpardiso->err = 0; 509 mat_mkl_cpardiso->phase = -1; 510 #if defined(PETSC_USE_COMPLEX) 511 mat_mkl_cpardiso->mtype = 13; 512 #else 513 mat_mkl_cpardiso->mtype = 11; 514 #endif 515 516 #if defined(PETSC_USE_REAL_SINGLE) 517 mat_mkl_cpardiso->iparm[27] = 1; 518 #else 519 mat_mkl_cpardiso->iparm[27] = 0; 520 #endif 521 522 mat_mkl_cpardiso->iparm[34] = 1; /* C style */ 523 524 mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 525 mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */ 526 mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */ 527 mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */ 528 mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 529 mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 530 mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 531 mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 532 mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 533 mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */ 534 535 mat_mkl_cpardiso->iparm[39] = 0; 536 if (size > 1) { 537 mat_mkl_cpardiso->iparm[39] = 2; 538 mat_mkl_cpardiso->iparm[40] = A->rmap->rstart; 539 mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1; 540 } 541 mat_mkl_cpardiso->perm = 0; 542 PetscFunctionReturn(0); 543 } 544 545 /* 546 * Symbolic decomposition. Mkl_Pardiso analysis phase. 547 */ 548 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 549 { 550 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 555 556 /* Set MKL_CPARDISO options from the options database */ 557 ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr); 558 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 559 560 mat_mkl_cpardiso->n = A->rmap->N; 561 562 /* analysis phase */ 563 /*----------------*/ 564 mat_mkl_cpardiso->phase = JOB_ANALYSIS; 565 566 cluster_sparse_solver ( 567 mat_mkl_cpardiso->pt, 568 &mat_mkl_cpardiso->maxfct, 569 &mat_mkl_cpardiso->mnum, 570 &mat_mkl_cpardiso->mtype, 571 &mat_mkl_cpardiso->phase, 572 &mat_mkl_cpardiso->n, 573 mat_mkl_cpardiso->a, 574 mat_mkl_cpardiso->ia, 575 mat_mkl_cpardiso->ja, 576 mat_mkl_cpardiso->perm, 577 &mat_mkl_cpardiso->nrhs, 578 mat_mkl_cpardiso->iparm, 579 &mat_mkl_cpardiso->msglvl, 580 NULL, 581 NULL, 582 &mat_mkl_cpardiso->comm_mkl_cpardiso, 583 &mat_mkl_cpardiso->err); 584 585 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 586 587 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 588 F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO; 589 F->ops->solve = MatSolve_MKL_CPARDISO; 590 F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 591 F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 592 PetscFunctionReturn(0); 593 } 594 595 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer) 596 { 597 PetscErrorCode ierr; 598 PetscBool iascii; 599 PetscViewerFormat format; 600 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 601 PetscInt i; 602 603 PetscFunctionBegin; 604 /* check if matrix is mkl_cpardiso type */ 605 if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0); 606 607 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 608 if (iascii) { 609 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 610 if (format == PETSC_VIEWER_ASCII_INFO) { 611 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr); 612 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr); 613 for(i = 1; i <= 64; i++){ 614 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr); 615 } 616 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr); 617 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr); 618 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr); 619 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr); 620 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 621 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr); 622 } 623 } 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info) 628 { 629 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data; 630 631 PetscFunctionBegin; 632 info->block_size = 1.0; 633 info->nz_allocated = mat_mkl_cpardiso->nz + 0.0; 634 info->nz_unneeded = 0.0; 635 info->assemblies = 0.0; 636 info->mallocs = 0.0; 637 info->memory = 0.0; 638 info->fill_ratio_given = 0; 639 info->fill_ratio_needed = 0; 640 info->factor_mallocs = 0; 641 PetscFunctionReturn(0); 642 } 643 644 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival) 645 { 646 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data; 647 648 PetscFunctionBegin; 649 if(icntl <= 64){ 650 mat_mkl_cpardiso->iparm[icntl - 1] = ival; 651 } else { 652 if(icntl == 65) mkl_set_num_threads((int)ival); 653 else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival; 654 else if(icntl == 67) mat_mkl_cpardiso->mnum = ival; 655 else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival; 656 else if(icntl == 69){ 657 int pt[IPARM_SIZE]; 658 mat_mkl_cpardiso->mtype = ival; 659 #if defined(PETSC_USE_REAL_SINGLE) 660 mat_mkl_cpardiso->iparm[27] = 1; 661 #else 662 mat_mkl_cpardiso->iparm[27] = 0; 663 #endif 664 mat_mkl_cpardiso->iparm[34] = 1; 665 } 666 } 667 PetscFunctionReturn(0); 668 } 669 670 /*@ 671 MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters 672 673 Logically Collective on Mat 674 675 Input Parameters: 676 + F - the factored matrix obtained by calling MatGetFactor() 677 . icntl - index of Mkl_Pardiso parameter 678 - ival - value of Mkl_Pardiso parameter 679 680 Options Database: 681 . -mat_mkl_cpardiso_<icntl> <ival> 682 683 Level: Intermediate 684 685 Notes: This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options 686 database approach when working with TS, SNES, or KSP. 687 688 References: 689 . Mkl_Pardiso Users' Guide 690 691 .seealso: MatGetFactor() 692 @*/ 693 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 694 { 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 702 static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type) 703 { 704 PetscFunctionBegin; 705 *type = MATSOLVERMKL_CPARDISO; 706 PetscFunctionReturn(0); 707 } 708 709 /* MatGetFactor for MPI AIJ matrices */ 710 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F) 711 { 712 Mat B; 713 PetscErrorCode ierr; 714 Mat_MKL_CPARDISO *mat_mkl_cpardiso; 715 PetscBool isSeqAIJ; 716 717 PetscFunctionBegin; 718 /* Create the factorization matrix */ 719 720 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 721 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 722 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 723 ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr); 724 ierr = MatSetUp(B);CHKERRQ(ierr); 725 726 ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr); 727 728 if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO; 729 else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO; 730 731 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO; 732 B->ops->destroy = MatDestroy_MKL_CPARDISO; 733 734 B->ops->view = MatView_MKL_CPARDISO; 735 B->ops->getinfo = MatGetInfo_MKL_CPARDISO; 736 737 B->factortype = ftype; 738 B->assembled = PETSC_TRUE; /* required by -ksp_view */ 739 740 B->data = mat_mkl_cpardiso; 741 742 /* set solvertype */ 743 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 744 ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr); 745 746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);CHKERRQ(ierr); 747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr); 748 ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr); 749 750 *F = B; 751 PetscFunctionReturn(0); 752 } 753 754 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void) 755 { 756 PetscErrorCode ierr; 757 758 PetscFunctionBegin; 759 ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 760 ierr = MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } 763