1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <math.h> 11 #include <mkl.h> 12 #include <mkl_cluster_sparse_solver.h> 13 14 /* 15 * Possible mkl_cpardiso phases that controls the execution of the solver. 16 * For more information check mkl_cpardiso manual. 17 */ 18 #define JOB_ANALYSIS 11 19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 21 #define JOB_NUMERICAL_FACTORIZATION 22 22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 27 #define JOB_RELEASE_OF_LU_MEMORY 0 28 #define JOB_RELEASE_OF_ALL_MEMORY -1 29 30 #define IPARM_SIZE 64 31 #define INT_TYPE MKL_INT 32 33 static const char *Err_MSG_CPardiso(int errNo){ 34 switch (errNo) { 35 case -1: 36 return "input inconsistent"; break; 37 case -2: 38 return "not enough memory"; break; 39 case -3: 40 return "reordering problem"; break; 41 case -4: 42 return "zero pivot, numerical factorization or iterative refinement problem"; break; 43 case -5: 44 return "unclassified (internal) error"; break; 45 case -6: 46 return "preordering failed (matrix types 11, 13 only)"; break; 47 case -7: 48 return "diagonal matrix problem"; break; 49 case -8: 50 return "32-bit integer overflow problem"; break; 51 case -9: 52 return "not enough memory for OOC"; break; 53 case -10: 54 return "problems with opening OOC temporary files"; break; 55 case -11: 56 return "read/write problems with the OOC data file"; break; 57 default : 58 return "unknown error"; 59 } 60 } 61 62 /* 63 * Internal data structure. 64 * For more information check mkl_cpardiso manual. 65 */ 66 67 typedef struct { 68 69 /* Configuration vector */ 70 INT_TYPE iparm[IPARM_SIZE]; 71 72 /* 73 * Internal mkl_cpardiso memory location. 74 * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak. 75 */ 76 void *pt[IPARM_SIZE]; 77 78 MPI_Comm comm_mkl_cpardiso; 79 80 /* Basic mkl_cpardiso info*/ 81 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 82 83 /* Matrix structure */ 84 PetscScalar *a; 85 86 INT_TYPE *ia, *ja; 87 88 /* Number of non-zero elements */ 89 INT_TYPE nz; 90 91 /* Row permutaton vector*/ 92 INT_TYPE *perm; 93 94 /* Define is matrix preserve sparce structure. */ 95 MatStructure matstruc; 96 97 PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**); 98 99 /* True if mkl_cpardiso function have been used. */ 100 PetscBool CleanUp; 101 } Mat_MKL_CPARDISO; 102 103 /* 104 * Copy the elements of matrix A. 105 * Input: 106 * - Mat A: MATSEQAIJ matrix 107 * - int shift: matrix index. 108 * - 0 for c representation 109 * - 1 for fortran representation 110 * - MatReuse reuse: 111 * - MAT_INITIAL_MATRIX: Create a new aij representation 112 * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values 113 * Output: 114 * - int *nnz: Number of nonzero-elements. 115 * - int **r pointer to i index 116 * - int **c pointer to j elements 117 * - MATRIXTYPE **v: Non-zero elements 118 */ 119 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 120 { 121 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 122 123 PetscFunctionBegin; 124 *v=aa->a; 125 if (reuse == MAT_INITIAL_MATRIX) { 126 *r = (INT_TYPE*)aa->i; 127 *c = (INT_TYPE*)aa->j; 128 *nnz = aa->nz; 129 } 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 134 { 135 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 136 PetscErrorCode ierr; 137 PetscInt rstart,nz,i,j,countA,countB; 138 PetscInt *row,*col; 139 const PetscScalar *av, *bv; 140 PetscScalar *val; 141 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 142 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 143 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 144 PetscInt colA_start,jB,jcol; 145 146 PetscFunctionBegin; 147 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 148 av=aa->a; bv=bb->a; 149 150 garray = mat->garray; 151 152 if (reuse == MAT_INITIAL_MATRIX) { 153 nz = aa->nz + bb->nz; 154 *nnz = nz; 155 ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr); 156 col = row + m + 1; 157 val = (PetscScalar*)(col + nz); 158 *r = row; *c = col; *v = val; 159 row[0] = 0; 160 } else { 161 row = *r; col = *c; val = *v; 162 } 163 164 nz = 0; 165 for (i=0; i<m; i++) { 166 row[i] = nz; 167 countA = ai[i+1] - ai[i]; 168 countB = bi[i+1] - bi[i]; 169 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 170 bjj = bj + bi[i]; 171 172 /* B part, smaller col index */ 173 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 174 jB = 0; 175 for (j=0; j<countB; j++) { 176 jcol = garray[bjj[j]]; 177 if (jcol > colA_start) { 178 jB = j; 179 break; 180 } 181 col[nz] = jcol; 182 val[nz++] = *bv++; 183 if (j==countB-1) jB = countB; 184 } 185 186 /* A part */ 187 for (j=0; j<countA; j++) { 188 col[nz] = rstart + ajj[j]; 189 val[nz++] = *av++; 190 } 191 192 /* B part, larger col index */ 193 for (j=jB; j<countB; j++) { 194 col[nz] = garray[bjj[j]]; 195 val[nz++] = *bv++; 196 } 197 } 198 row[m] = nz; 199 200 PetscFunctionReturn(0); 201 } 202 203 /* 204 * Free memory for Mat_MKL_CPARDISO structure and pointers to objects. 205 */ 206 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A) 207 { 208 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 /* Terminate instance, deallocate memories */ 213 if (mat_mkl_cpardiso->CleanUp) { 214 mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 215 216 cluster_sparse_solver ( 217 mat_mkl_cpardiso->pt, 218 &mat_mkl_cpardiso->maxfct, 219 &mat_mkl_cpardiso->mnum, 220 &mat_mkl_cpardiso->mtype, 221 &mat_mkl_cpardiso->phase, 222 &mat_mkl_cpardiso->n, 223 NULL, 224 NULL, 225 NULL, 226 mat_mkl_cpardiso->perm, 227 &mat_mkl_cpardiso->nrhs, 228 mat_mkl_cpardiso->iparm, 229 &mat_mkl_cpardiso->msglvl, 230 NULL, 231 NULL, 232 &mat_mkl_cpardiso->comm_mkl_cpardiso, 233 (int*)&mat_mkl_cpardiso->err); 234 } 235 236 if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) { 237 ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr); 238 } 239 ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 240 ierr = PetscFree(A->data);CHKERRQ(ierr); 241 242 /* clear composed functions */ 243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 244 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 /* 249 * Computes Ax = b 250 */ 251 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x) 252 { 253 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 254 PetscErrorCode ierr; 255 PetscScalar *xarray; 256 const PetscScalar *barray; 257 258 PetscFunctionBegin; 259 mat_mkl_cpardiso->nrhs = 1; 260 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 261 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 262 263 /* solve phase */ 264 /*-------------*/ 265 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 266 cluster_sparse_solver ( 267 mat_mkl_cpardiso->pt, 268 &mat_mkl_cpardiso->maxfct, 269 &mat_mkl_cpardiso->mnum, 270 &mat_mkl_cpardiso->mtype, 271 &mat_mkl_cpardiso->phase, 272 &mat_mkl_cpardiso->n, 273 mat_mkl_cpardiso->a, 274 mat_mkl_cpardiso->ia, 275 mat_mkl_cpardiso->ja, 276 mat_mkl_cpardiso->perm, 277 &mat_mkl_cpardiso->nrhs, 278 mat_mkl_cpardiso->iparm, 279 &mat_mkl_cpardiso->msglvl, 280 (void*)barray, 281 (void*)xarray, 282 &mat_mkl_cpardiso->comm_mkl_cpardiso, 283 (int*)&mat_mkl_cpardiso->err); 284 285 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 286 287 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 288 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 289 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 290 PetscFunctionReturn(0); 291 } 292 293 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x) 294 { 295 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 296 PetscErrorCode ierr; 297 298 PetscFunctionBegin; 299 #if defined(PETSC_USE_COMPLEX) 300 mat_mkl_cpardiso->iparm[12 - 1] = 1; 301 #else 302 mat_mkl_cpardiso->iparm[12 - 1] = 2; 303 #endif 304 ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr); 305 mat_mkl_cpardiso->iparm[12 - 1] = 0; 306 PetscFunctionReturn(0); 307 } 308 309 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X) 310 { 311 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 312 PetscErrorCode ierr; 313 PetscScalar *xarray; 314 const PetscScalar *barray; 315 316 PetscFunctionBegin; 317 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 318 319 if(mat_mkl_cpardiso->nrhs > 0){ 320 ierr = MatDenseGetArrayRead(B,&barray); 321 ierr = MatDenseGetArray(X,&xarray); 322 323 /* solve phase */ 324 /*-------------*/ 325 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 326 cluster_sparse_solver ( 327 mat_mkl_cpardiso->pt, 328 &mat_mkl_cpardiso->maxfct, 329 &mat_mkl_cpardiso->mnum, 330 &mat_mkl_cpardiso->mtype, 331 &mat_mkl_cpardiso->phase, 332 &mat_mkl_cpardiso->n, 333 mat_mkl_cpardiso->a, 334 mat_mkl_cpardiso->ia, 335 mat_mkl_cpardiso->ja, 336 mat_mkl_cpardiso->perm, 337 &mat_mkl_cpardiso->nrhs, 338 mat_mkl_cpardiso->iparm, 339 &mat_mkl_cpardiso->msglvl, 340 (void*)barray, 341 (void*)xarray, 342 &mat_mkl_cpardiso->comm_mkl_cpardiso, 343 (int*)&mat_mkl_cpardiso->err); 344 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 345 ierr = MatDenseRestoreArrayRead(B,&barray); 346 ierr = MatDenseRestoreArray(X,&xarray); 347 348 } 349 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 350 PetscFunctionReturn(0); 351 352 } 353 354 /* 355 * LU Decomposition 356 */ 357 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info) 358 { 359 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data; 360 PetscErrorCode ierr; 361 362 PetscFunctionBegin; 363 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 364 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 365 366 mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION; 367 cluster_sparse_solver ( 368 mat_mkl_cpardiso->pt, 369 &mat_mkl_cpardiso->maxfct, 370 &mat_mkl_cpardiso->mnum, 371 &mat_mkl_cpardiso->mtype, 372 &mat_mkl_cpardiso->phase, 373 &mat_mkl_cpardiso->n, 374 mat_mkl_cpardiso->a, 375 mat_mkl_cpardiso->ia, 376 mat_mkl_cpardiso->ja, 377 mat_mkl_cpardiso->perm, 378 &mat_mkl_cpardiso->nrhs, 379 mat_mkl_cpardiso->iparm, 380 &mat_mkl_cpardiso->msglvl, 381 NULL, 382 NULL, 383 &mat_mkl_cpardiso->comm_mkl_cpardiso, 384 &mat_mkl_cpardiso->err); 385 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 386 387 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 388 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 389 PetscFunctionReturn(0); 390 } 391 392 /* Sets mkl_cpardiso options from the options database */ 393 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A) 394 { 395 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 396 PetscErrorCode ierr; 397 PetscInt icntl; 398 PetscBool flg; 399 int threads; 400 401 PetscFunctionBegin; 402 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr); 403 ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr); 404 if (flg) mkl_set_num_threads(threads); 405 406 ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr); 407 if (flg) mat_mkl_cpardiso->maxfct = icntl; 408 409 ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr); 410 if (flg) mat_mkl_cpardiso->mnum = icntl; 411 412 ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr); 413 if (flg) mat_mkl_cpardiso->msglvl = icntl; 414 415 ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr); 416 if(flg){ 417 mat_mkl_cpardiso->mtype = icntl; 418 #if defined(PETSC_USE_REAL_SINGLE) 419 mat_mkl_cpardiso->iparm[27] = 1; 420 #else 421 mat_mkl_cpardiso->iparm[27] = 0; 422 #endif 423 mat_mkl_cpardiso->iparm[34] = 1; 424 } 425 ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr); 426 427 if(flg && icntl != 0){ 428 ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr); 429 if (flg) mat_mkl_cpardiso->iparm[1] = icntl; 430 431 ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr); 432 if (flg) mat_mkl_cpardiso->iparm[3] = icntl; 433 434 ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr); 435 if (flg) mat_mkl_cpardiso->iparm[4] = icntl; 436 437 ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr); 438 if (flg) mat_mkl_cpardiso->iparm[5] = icntl; 439 440 ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr); 441 if (flg) mat_mkl_cpardiso->iparm[7] = icntl; 442 443 ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr); 444 if (flg) mat_mkl_cpardiso->iparm[9] = icntl; 445 446 ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr); 447 if (flg) mat_mkl_cpardiso->iparm[10] = icntl; 448 449 ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr); 450 if (flg) mat_mkl_cpardiso->iparm[11] = icntl; 451 452 ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl, 453 &flg);CHKERRQ(ierr); 454 if (flg) mat_mkl_cpardiso->iparm[12] = icntl; 455 456 ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl, 457 &flg);CHKERRQ(ierr); 458 if (flg) mat_mkl_cpardiso->iparm[17] = icntl; 459 460 ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr); 461 if (flg) mat_mkl_cpardiso->iparm[18] = icntl; 462 463 ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr); 464 if (flg) mat_mkl_cpardiso->iparm[20] = icntl; 465 466 ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr); 467 if (flg) mat_mkl_cpardiso->iparm[23] = icntl; 468 469 ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr); 470 if (flg) mat_mkl_cpardiso->iparm[24] = icntl; 471 472 ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr); 473 if (flg) mat_mkl_cpardiso->iparm[26] = icntl; 474 475 ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr); 476 if (flg) mat_mkl_cpardiso->iparm[30] = icntl; 477 478 ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr); 479 if (flg) mat_mkl_cpardiso->iparm[33] = icntl; 480 481 ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr); 482 if (flg) mat_mkl_cpardiso->iparm[59] = icntl; 483 } 484 485 ierr = PetscOptionsEnd();CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 489 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso) 490 { 491 PetscErrorCode ierr; 492 PetscMPIInt size; 493 494 PetscFunctionBegin; 495 496 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 497 ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr); 498 499 mat_mkl_cpardiso->CleanUp = PETSC_FALSE; 500 mat_mkl_cpardiso->maxfct = 1; 501 mat_mkl_cpardiso->mnum = 1; 502 mat_mkl_cpardiso->n = A->rmap->N; 503 mat_mkl_cpardiso->msglvl = 0; 504 mat_mkl_cpardiso->nrhs = 1; 505 mat_mkl_cpardiso->err = 0; 506 mat_mkl_cpardiso->phase = -1; 507 #if defined(PETSC_USE_COMPLEX) 508 mat_mkl_cpardiso->mtype = 13; 509 #else 510 mat_mkl_cpardiso->mtype = 11; 511 #endif 512 513 #if defined(PETSC_USE_REAL_SINGLE) 514 mat_mkl_cpardiso->iparm[27] = 1; 515 #else 516 mat_mkl_cpardiso->iparm[27] = 0; 517 #endif 518 519 mat_mkl_cpardiso->iparm[34] = 1; /* C style */ 520 521 mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 522 mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */ 523 mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */ 524 mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */ 525 mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 526 mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 527 mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 528 mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 529 mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 530 mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */ 531 532 mat_mkl_cpardiso->iparm[39] = 0; 533 if (size > 1) { 534 mat_mkl_cpardiso->iparm[39] = 2; 535 mat_mkl_cpardiso->iparm[40] = A->rmap->rstart; 536 mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1; 537 } 538 mat_mkl_cpardiso->perm = 0; 539 PetscFunctionReturn(0); 540 } 541 542 /* 543 * Symbolic decomposition. Mkl_Pardiso analysis phase. 544 */ 545 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 546 { 547 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 548 PetscErrorCode ierr; 549 550 PetscFunctionBegin; 551 mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 552 553 /* Set MKL_CPARDISO options from the options database */ 554 ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr); 555 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 556 557 mat_mkl_cpardiso->n = A->rmap->N; 558 559 /* analysis phase */ 560 /*----------------*/ 561 mat_mkl_cpardiso->phase = JOB_ANALYSIS; 562 563 cluster_sparse_solver ( 564 mat_mkl_cpardiso->pt, 565 &mat_mkl_cpardiso->maxfct, 566 &mat_mkl_cpardiso->mnum, 567 &mat_mkl_cpardiso->mtype, 568 &mat_mkl_cpardiso->phase, 569 &mat_mkl_cpardiso->n, 570 mat_mkl_cpardiso->a, 571 mat_mkl_cpardiso->ia, 572 mat_mkl_cpardiso->ja, 573 mat_mkl_cpardiso->perm, 574 &mat_mkl_cpardiso->nrhs, 575 mat_mkl_cpardiso->iparm, 576 &mat_mkl_cpardiso->msglvl, 577 NULL, 578 NULL, 579 &mat_mkl_cpardiso->comm_mkl_cpardiso, 580 (int*)&mat_mkl_cpardiso->err); 581 582 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 583 584 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 585 F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO; 586 F->ops->solve = MatSolve_MKL_CPARDISO; 587 F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 588 F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 589 PetscFunctionReturn(0); 590 } 591 592 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer) 593 { 594 PetscErrorCode ierr; 595 PetscBool iascii; 596 PetscViewerFormat format; 597 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 598 PetscInt i; 599 600 PetscFunctionBegin; 601 /* check if matrix is mkl_cpardiso type */ 602 if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0); 603 604 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 605 if (iascii) { 606 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 607 if (format == PETSC_VIEWER_ASCII_INFO) { 608 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr); 609 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr); 610 for(i = 1; i <= 64; i++){ 611 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr); 612 } 613 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr); 614 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr); 615 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr); 616 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr); 617 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 618 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info) 625 { 626 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data; 627 628 PetscFunctionBegin; 629 info->block_size = 1.0; 630 info->nz_allocated = mat_mkl_cpardiso->nz + 0.0; 631 info->nz_unneeded = 0.0; 632 info->assemblies = 0.0; 633 info->mallocs = 0.0; 634 info->memory = 0.0; 635 info->fill_ratio_given = 0; 636 info->fill_ratio_needed = 0; 637 info->factor_mallocs = 0; 638 PetscFunctionReturn(0); 639 } 640 641 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival) 642 { 643 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data; 644 645 PetscFunctionBegin; 646 if(icntl <= 64){ 647 mat_mkl_cpardiso->iparm[icntl - 1] = ival; 648 } else { 649 if(icntl == 65) mkl_set_num_threads((int)ival); 650 else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival; 651 else if(icntl == 67) mat_mkl_cpardiso->mnum = ival; 652 else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival; 653 else if(icntl == 69){ 654 mat_mkl_cpardiso->mtype = ival; 655 #if defined(PETSC_USE_REAL_SINGLE) 656 mat_mkl_cpardiso->iparm[27] = 1; 657 #else 658 mat_mkl_cpardiso->iparm[27] = 0; 659 #endif 660 mat_mkl_cpardiso->iparm[34] = 1; 661 } 662 } 663 PetscFunctionReturn(0); 664 } 665 666 /*@ 667 MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters 668 669 Logically Collective on Mat 670 671 Input Parameters: 672 + F - the factored matrix obtained by calling MatGetFactor() 673 . icntl - index of Mkl_Pardiso parameter 674 - ival - value of Mkl_Pardiso parameter 675 676 Options Database: 677 . -mat_mkl_cpardiso_<icntl> <ival> 678 679 Level: Intermediate 680 681 Notes: This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options 682 database approach when working with TS, SNES, or KSP. 683 684 References: 685 . Mkl_Pardiso Users' Guide 686 687 .seealso: MatGetFactor() 688 @*/ 689 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 690 { 691 PetscErrorCode ierr; 692 693 PetscFunctionBegin; 694 ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 695 PetscFunctionReturn(0); 696 } 697 698 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type) 699 { 700 PetscFunctionBegin; 701 *type = MATSOLVERMKL_CPARDISO; 702 PetscFunctionReturn(0); 703 } 704 705 /* MatGetFactor for MPI AIJ matrices */ 706 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F) 707 { 708 Mat B; 709 PetscErrorCode ierr; 710 Mat_MKL_CPARDISO *mat_mkl_cpardiso; 711 PetscBool isSeqAIJ; 712 713 PetscFunctionBegin; 714 /* Create the factorization matrix */ 715 716 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 717 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 718 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 719 ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr); 720 ierr = MatSetUp(B);CHKERRQ(ierr); 721 722 ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr); 723 724 if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO; 725 else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO; 726 727 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO; 728 B->ops->destroy = MatDestroy_MKL_CPARDISO; 729 730 B->ops->view = MatView_MKL_CPARDISO; 731 B->ops->getinfo = MatGetInfo_MKL_CPARDISO; 732 733 B->factortype = ftype; 734 B->assembled = PETSC_TRUE; /* required by -ksp_view */ 735 736 B->data = mat_mkl_cpardiso; 737 738 /* set solvertype */ 739 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 740 ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr); 741 742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr); 744 ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr); 745 746 *F = B; 747 PetscFunctionReturn(0); 748 } 749 750 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void) 751 { 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 756 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759