1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <math.h> 11 #include <mkl.h> 12 #include <mkl_cluster_sparse_solver.h> 13 14 /* 15 * Possible mkl_cpardiso phases that controls the execution of the solver. 16 * For more information check mkl_cpardiso manual. 17 */ 18 #define JOB_ANALYSIS 11 19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 21 #define JOB_NUMERICAL_FACTORIZATION 22 22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 27 #define JOB_RELEASE_OF_LU_MEMORY 0 28 #define JOB_RELEASE_OF_ALL_MEMORY -1 29 30 #define IPARM_SIZE 64 31 #define INT_TYPE MKL_INT 32 33 static const char *Err_MSG_CPardiso(int errNo){ 34 switch (errNo) { 35 case -1: 36 return "input inconsistent"; break; 37 case -2: 38 return "not enough memory"; break; 39 case -3: 40 return "reordering problem"; break; 41 case -4: 42 return "zero pivot, numerical factorization or iterative refinement problem"; break; 43 case -5: 44 return "unclassified (internal) error"; break; 45 case -6: 46 return "preordering failed (matrix types 11, 13 only)"; break; 47 case -7: 48 return "diagonal matrix problem"; break; 49 case -8: 50 return "32-bit integer overflow problem"; break; 51 case -9: 52 return "not enough memory for OOC"; break; 53 case -10: 54 return "problems with opening OOC temporary files"; break; 55 case -11: 56 return "read/write problems with the OOC data file"; break; 57 default : 58 return "unknown error"; 59 } 60 } 61 62 /* 63 * Internal data structure. 64 * For more information check mkl_cpardiso manual. 65 */ 66 67 typedef struct { 68 69 /* Configuration vector */ 70 INT_TYPE iparm[IPARM_SIZE]; 71 72 /* 73 * Internal mkl_cpardiso memory location. 74 * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak. 75 */ 76 void *pt[IPARM_SIZE]; 77 78 MPI_Comm comm_mkl_cpardiso; 79 80 /* Basic mkl_cpardiso info*/ 81 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 82 83 /* Matrix structure */ 84 PetscScalar *a; 85 86 INT_TYPE *ia, *ja; 87 88 /* Number of non-zero elements */ 89 INT_TYPE nz; 90 91 /* Row permutaton vector*/ 92 INT_TYPE *perm; 93 94 /* Define is matrix preserve sparce structure. */ 95 MatStructure matstruc; 96 97 PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**); 98 99 /* True if mkl_cpardiso function have been used. */ 100 PetscBool CleanUp; 101 } Mat_MKL_CPARDISO; 102 103 /* 104 * Copy the elements of matrix A. 105 * Input: 106 * - Mat A: MATSEQAIJ matrix 107 * - int shift: matrix index. 108 * - 0 for c representation 109 * - 1 for fortran representation 110 * - MatReuse reuse: 111 * - MAT_INITIAL_MATRIX: Create a new aij representation 112 * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values 113 * Output: 114 * - int *nnz: Number of nonzero-elements. 115 * - int **r pointer to i index 116 * - int **c pointer to j elements 117 * - MATRIXTYPE **v: Non-zero elements 118 */ 119 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 120 { 121 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 122 123 PetscFunctionBegin; 124 *v=aa->a; 125 if (reuse == MAT_INITIAL_MATRIX) { 126 *r = (INT_TYPE*)aa->i; 127 *c = (INT_TYPE*)aa->j; 128 *nnz = aa->nz; 129 } 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 134 { 135 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 136 PetscErrorCode ierr; 137 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 138 PetscInt *row,*col; 139 const PetscScalar *av, *bv,*v1,*v2; 140 PetscScalar *val; 141 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 142 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 143 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 144 PetscInt nn, colA_start,jB,jcol; 145 146 PetscFunctionBegin; 147 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 148 av=aa->a; bv=bb->a; 149 150 garray = mat->garray; 151 152 if (reuse == MAT_INITIAL_MATRIX) { 153 nz = aa->nz + bb->nz; 154 *nnz = nz; 155 ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr); 156 col = row + m + 1; 157 val = (PetscScalar*)(col + nz); 158 *r = row; *c = col; *v = val; 159 row[0] = 0; 160 } else { 161 row = *r; col = *c; val = *v; 162 } 163 164 nz = 0; 165 for (i=0; i<m; i++) { 166 row[i] = nz; 167 countA = ai[i+1] - ai[i]; 168 countB = bi[i+1] - bi[i]; 169 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 170 bjj = bj + bi[i]; 171 172 /* B part, smaller col index */ 173 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 174 jB = 0; 175 for (j=0; j<countB; j++) { 176 jcol = garray[bjj[j]]; 177 if (jcol > colA_start) { 178 jB = j; 179 break; 180 } 181 col[nz] = jcol; 182 val[nz++] = *bv++; 183 if (j==countB-1) jB = countB; 184 } 185 186 /* A part */ 187 for (j=0; j<countA; j++) { 188 col[nz] = rstart + ajj[j]; 189 val[nz++] = *av++; 190 } 191 192 /* B part, larger col index */ 193 for (j=jB; j<countB; j++) { 194 col[nz] = garray[bjj[j]]; 195 val[nz++] = *bv++; 196 } 197 } 198 row[m] = nz; 199 200 PetscFunctionReturn(0); 201 } 202 203 /* 204 * Free memory for Mat_MKL_CPARDISO structure and pointers to objects. 205 */ 206 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A) 207 { 208 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 /* Terminate instance, deallocate memories */ 213 if (mat_mkl_cpardiso->CleanUp) { 214 mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 215 216 cluster_sparse_solver ( 217 mat_mkl_cpardiso->pt, 218 &mat_mkl_cpardiso->maxfct, 219 &mat_mkl_cpardiso->mnum, 220 &mat_mkl_cpardiso->mtype, 221 &mat_mkl_cpardiso->phase, 222 &mat_mkl_cpardiso->n, 223 NULL, 224 NULL, 225 NULL, 226 mat_mkl_cpardiso->perm, 227 &mat_mkl_cpardiso->nrhs, 228 mat_mkl_cpardiso->iparm, 229 &mat_mkl_cpardiso->msglvl, 230 NULL, 231 NULL, 232 &mat_mkl_cpardiso->comm_mkl_cpardiso, 233 &mat_mkl_cpardiso->err); 234 } 235 236 if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) { 237 ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr); 238 } 239 ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 240 ierr = PetscFree(A->data);CHKERRQ(ierr); 241 242 /* clear composed functions */ 243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 244 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 /* 249 * Computes Ax = b 250 */ 251 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x) 252 { 253 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 254 PetscErrorCode ierr; 255 PetscScalar *xarray; 256 const PetscScalar *barray; 257 258 PetscFunctionBegin; 259 mat_mkl_cpardiso->nrhs = 1; 260 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 261 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 262 263 /* solve phase */ 264 /*-------------*/ 265 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 266 cluster_sparse_solver ( 267 mat_mkl_cpardiso->pt, 268 &mat_mkl_cpardiso->maxfct, 269 &mat_mkl_cpardiso->mnum, 270 &mat_mkl_cpardiso->mtype, 271 &mat_mkl_cpardiso->phase, 272 &mat_mkl_cpardiso->n, 273 mat_mkl_cpardiso->a, 274 mat_mkl_cpardiso->ia, 275 mat_mkl_cpardiso->ja, 276 mat_mkl_cpardiso->perm, 277 &mat_mkl_cpardiso->nrhs, 278 mat_mkl_cpardiso->iparm, 279 &mat_mkl_cpardiso->msglvl, 280 (void*)barray, 281 (void*)xarray, 282 &mat_mkl_cpardiso->comm_mkl_cpardiso, 283 &mat_mkl_cpardiso->err); 284 285 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 286 287 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 288 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 289 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 290 PetscFunctionReturn(0); 291 } 292 293 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x) 294 { 295 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 296 PetscErrorCode ierr; 297 298 PetscFunctionBegin; 299 #if defined(PETSC_USE_COMPLEX) 300 mat_mkl_cpardiso->iparm[12 - 1] = 1; 301 #else 302 mat_mkl_cpardiso->iparm[12 - 1] = 2; 303 #endif 304 ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr); 305 mat_mkl_cpardiso->iparm[12 - 1] = 0; 306 PetscFunctionReturn(0); 307 } 308 309 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X) 310 { 311 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 312 PetscErrorCode ierr; 313 PetscScalar *xarray; 314 const PetscScalar *barray 315 PetscBool flg; 316 317 PetscFunctionBegin; 318 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr); 319 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix"); 320 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr); 321 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix"); 322 323 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 324 325 if(mat_mkl_cpardiso->nrhs > 0){ 326 ierr = MatDenseGetArrayRead(B,&barray); 327 ierr = MatDenseGetArray(X,&xarray); 328 329 /* solve phase */ 330 /*-------------*/ 331 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 332 cluster_sparse_solver ( 333 mat_mkl_cpardiso->pt, 334 &mat_mkl_cpardiso->maxfct, 335 &mat_mkl_cpardiso->mnum, 336 &mat_mkl_cpardiso->mtype, 337 &mat_mkl_cpardiso->phase, 338 &mat_mkl_cpardiso->n, 339 mat_mkl_cpardiso->a, 340 mat_mkl_cpardiso->ia, 341 mat_mkl_cpardiso->ja, 342 mat_mkl_cpardiso->perm, 343 &mat_mkl_cpardiso->nrhs, 344 mat_mkl_cpardiso->iparm, 345 &mat_mkl_cpardiso->msglvl, 346 (void*)barray, 347 (void*)xarray, 348 &mat_mkl_cpardiso->comm_mkl_cpardiso, 349 &mat_mkl_cpardiso->err); 350 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 351 ierr = MatDenseRestoreArrayRead(B,&barray); 352 ierr = MatDenseRestoreArray(X,&xarray); 353 354 } 355 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 356 PetscFunctionReturn(0); 357 358 } 359 360 /* 361 * LU Decomposition 362 */ 363 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info) 364 { 365 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data; 366 PetscErrorCode ierr; 367 368 PetscFunctionBegin; 369 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 370 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 371 372 mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION; 373 cluster_sparse_solver ( 374 mat_mkl_cpardiso->pt, 375 &mat_mkl_cpardiso->maxfct, 376 &mat_mkl_cpardiso->mnum, 377 &mat_mkl_cpardiso->mtype, 378 &mat_mkl_cpardiso->phase, 379 &mat_mkl_cpardiso->n, 380 mat_mkl_cpardiso->a, 381 mat_mkl_cpardiso->ia, 382 mat_mkl_cpardiso->ja, 383 mat_mkl_cpardiso->perm, 384 &mat_mkl_cpardiso->nrhs, 385 mat_mkl_cpardiso->iparm, 386 &mat_mkl_cpardiso->msglvl, 387 NULL, 388 NULL, 389 &mat_mkl_cpardiso->comm_mkl_cpardiso, 390 &mat_mkl_cpardiso->err); 391 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 392 393 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 394 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 395 PetscFunctionReturn(0); 396 } 397 398 /* Sets mkl_cpardiso options from the options database */ 399 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A) 400 { 401 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 402 PetscErrorCode ierr; 403 PetscInt icntl; 404 PetscBool flg; 405 int pt[IPARM_SIZE], threads; 406 407 PetscFunctionBegin; 408 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr); 409 ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr); 410 if (flg) mkl_set_num_threads(threads); 411 412 ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr); 413 if (flg) mat_mkl_cpardiso->maxfct = icntl; 414 415 ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr); 416 if (flg) mat_mkl_cpardiso->mnum = icntl; 417 418 ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr); 419 if (flg) mat_mkl_cpardiso->msglvl = icntl; 420 421 ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr); 422 if(flg){ 423 mat_mkl_cpardiso->mtype = icntl; 424 #if defined(PETSC_USE_REAL_SINGLE) 425 mat_mkl_cpardiso->iparm[27] = 1; 426 #else 427 mat_mkl_cpardiso->iparm[27] = 0; 428 #endif 429 mat_mkl_cpardiso->iparm[34] = 1; 430 } 431 ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr); 432 433 if(flg && icntl != 0){ 434 ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr); 435 if (flg) mat_mkl_cpardiso->iparm[1] = icntl; 436 437 ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr); 438 if (flg) mat_mkl_cpardiso->iparm[3] = icntl; 439 440 ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr); 441 if (flg) mat_mkl_cpardiso->iparm[4] = icntl; 442 443 ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr); 444 if (flg) mat_mkl_cpardiso->iparm[5] = icntl; 445 446 ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr); 447 if (flg) mat_mkl_cpardiso->iparm[7] = icntl; 448 449 ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr); 450 if (flg) mat_mkl_cpardiso->iparm[9] = icntl; 451 452 ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr); 453 if (flg) mat_mkl_cpardiso->iparm[10] = icntl; 454 455 ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr); 456 if (flg) mat_mkl_cpardiso->iparm[11] = icntl; 457 458 ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl, 459 &flg);CHKERRQ(ierr); 460 if (flg) mat_mkl_cpardiso->iparm[12] = icntl; 461 462 ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl, 463 &flg);CHKERRQ(ierr); 464 if (flg) mat_mkl_cpardiso->iparm[17] = icntl; 465 466 ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr); 467 if (flg) mat_mkl_cpardiso->iparm[18] = icntl; 468 469 ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr); 470 if (flg) mat_mkl_cpardiso->iparm[20] = icntl; 471 472 ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr); 473 if (flg) mat_mkl_cpardiso->iparm[23] = icntl; 474 475 ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr); 476 if (flg) mat_mkl_cpardiso->iparm[24] = icntl; 477 478 ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr); 479 if (flg) mat_mkl_cpardiso->iparm[26] = icntl; 480 481 ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr); 482 if (flg) mat_mkl_cpardiso->iparm[30] = icntl; 483 484 ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr); 485 if (flg) mat_mkl_cpardiso->iparm[33] = icntl; 486 487 ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr); 488 if (flg) mat_mkl_cpardiso->iparm[59] = icntl; 489 } 490 491 ierr = PetscOptionsEnd();CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso) 496 { 497 PetscErrorCode ierr; 498 PetscInt i; 499 PetscMPIInt size; 500 501 PetscFunctionBegin; 502 503 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 504 ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr); 505 506 mat_mkl_cpardiso->CleanUp = PETSC_FALSE; 507 mat_mkl_cpardiso->maxfct = 1; 508 mat_mkl_cpardiso->mnum = 1; 509 mat_mkl_cpardiso->n = A->rmap->N; 510 mat_mkl_cpardiso->msglvl = 0; 511 mat_mkl_cpardiso->nrhs = 1; 512 mat_mkl_cpardiso->err = 0; 513 mat_mkl_cpardiso->phase = -1; 514 #if defined(PETSC_USE_COMPLEX) 515 mat_mkl_cpardiso->mtype = 13; 516 #else 517 mat_mkl_cpardiso->mtype = 11; 518 #endif 519 520 #if defined(PETSC_USE_REAL_SINGLE) 521 mat_mkl_cpardiso->iparm[27] = 1; 522 #else 523 mat_mkl_cpardiso->iparm[27] = 0; 524 #endif 525 526 mat_mkl_cpardiso->iparm[34] = 1; /* C style */ 527 528 mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 529 mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */ 530 mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */ 531 mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */ 532 mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 533 mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 534 mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 535 mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 536 mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 537 mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */ 538 539 mat_mkl_cpardiso->iparm[39] = 0; 540 if (size > 1) { 541 mat_mkl_cpardiso->iparm[39] = 2; 542 mat_mkl_cpardiso->iparm[40] = A->rmap->rstart; 543 mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1; 544 } 545 mat_mkl_cpardiso->perm = 0; 546 PetscFunctionReturn(0); 547 } 548 549 /* 550 * Symbolic decomposition. Mkl_Pardiso analysis phase. 551 */ 552 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 553 { 554 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 555 PetscErrorCode ierr; 556 557 PetscFunctionBegin; 558 mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 559 560 /* Set MKL_CPARDISO options from the options database */ 561 ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr); 562 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 563 564 mat_mkl_cpardiso->n = A->rmap->N; 565 566 /* analysis phase */ 567 /*----------------*/ 568 mat_mkl_cpardiso->phase = JOB_ANALYSIS; 569 570 cluster_sparse_solver ( 571 mat_mkl_cpardiso->pt, 572 &mat_mkl_cpardiso->maxfct, 573 &mat_mkl_cpardiso->mnum, 574 &mat_mkl_cpardiso->mtype, 575 &mat_mkl_cpardiso->phase, 576 &mat_mkl_cpardiso->n, 577 mat_mkl_cpardiso->a, 578 mat_mkl_cpardiso->ia, 579 mat_mkl_cpardiso->ja, 580 mat_mkl_cpardiso->perm, 581 &mat_mkl_cpardiso->nrhs, 582 mat_mkl_cpardiso->iparm, 583 &mat_mkl_cpardiso->msglvl, 584 NULL, 585 NULL, 586 &mat_mkl_cpardiso->comm_mkl_cpardiso, 587 &mat_mkl_cpardiso->err); 588 589 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 590 591 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 592 F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO; 593 F->ops->solve = MatSolve_MKL_CPARDISO; 594 F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 595 F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 596 PetscFunctionReturn(0); 597 } 598 599 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer) 600 { 601 PetscErrorCode ierr; 602 PetscBool iascii; 603 PetscViewerFormat format; 604 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 605 PetscInt i; 606 607 PetscFunctionBegin; 608 /* check if matrix is mkl_cpardiso type */ 609 if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0); 610 611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 612 if (iascii) { 613 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 614 if (format == PETSC_VIEWER_ASCII_INFO) { 615 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr); 616 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr); 617 for(i = 1; i <= 64; i++){ 618 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr); 619 } 620 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr); 621 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr); 622 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr); 623 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr); 624 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 625 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr); 626 } 627 } 628 PetscFunctionReturn(0); 629 } 630 631 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info) 632 { 633 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data; 634 635 PetscFunctionBegin; 636 info->block_size = 1.0; 637 info->nz_allocated = mat_mkl_cpardiso->nz + 0.0; 638 info->nz_unneeded = 0.0; 639 info->assemblies = 0.0; 640 info->mallocs = 0.0; 641 info->memory = 0.0; 642 info->fill_ratio_given = 0; 643 info->fill_ratio_needed = 0; 644 info->factor_mallocs = 0; 645 PetscFunctionReturn(0); 646 } 647 648 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival) 649 { 650 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data; 651 652 PetscFunctionBegin; 653 if(icntl <= 64){ 654 mat_mkl_cpardiso->iparm[icntl - 1] = ival; 655 } else { 656 if(icntl == 65) mkl_set_num_threads((int)ival); 657 else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival; 658 else if(icntl == 67) mat_mkl_cpardiso->mnum = ival; 659 else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival; 660 else if(icntl == 69){ 661 int pt[IPARM_SIZE]; 662 mat_mkl_cpardiso->mtype = ival; 663 #if defined(PETSC_USE_REAL_SINGLE) 664 mat_mkl_cpardiso->iparm[27] = 1; 665 #else 666 mat_mkl_cpardiso->iparm[27] = 0; 667 #endif 668 mat_mkl_cpardiso->iparm[34] = 1; 669 } 670 } 671 PetscFunctionReturn(0); 672 } 673 674 /*@ 675 MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters 676 677 Logically Collective on Mat 678 679 Input Parameters: 680 + F - the factored matrix obtained by calling MatGetFactor() 681 . icntl - index of Mkl_Pardiso parameter 682 - ival - value of Mkl_Pardiso parameter 683 684 Options Database: 685 . -mat_mkl_cpardiso_<icntl> <ival> 686 687 Level: Intermediate 688 689 Notes: This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options 690 database approach when working with TS, SNES, or KSP. 691 692 References: 693 . Mkl_Pardiso Users' Guide 694 695 .seealso: MatGetFactor() 696 @*/ 697 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 698 { 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type) 707 { 708 PetscFunctionBegin; 709 *type = MATSOLVERMKL_CPARDISO; 710 PetscFunctionReturn(0); 711 } 712 713 /* MatGetFactor for MPI AIJ matrices */ 714 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F) 715 { 716 Mat B; 717 PetscErrorCode ierr; 718 Mat_MKL_CPARDISO *mat_mkl_cpardiso; 719 PetscBool isSeqAIJ; 720 721 PetscFunctionBegin; 722 /* Create the factorization matrix */ 723 724 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 725 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 726 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 727 ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr); 728 ierr = MatSetUp(B);CHKERRQ(ierr); 729 730 ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr); 731 732 if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO; 733 else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO; 734 735 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO; 736 B->ops->destroy = MatDestroy_MKL_CPARDISO; 737 738 B->ops->view = MatView_MKL_CPARDISO; 739 B->ops->getinfo = MatGetInfo_MKL_CPARDISO; 740 741 B->factortype = ftype; 742 B->assembled = PETSC_TRUE; /* required by -ksp_view */ 743 744 B->data = mat_mkl_cpardiso; 745 746 /* set solvertype */ 747 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 748 ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr); 749 750 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr); 751 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr); 752 ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr); 753 754 *F = B; 755 PetscFunctionReturn(0); 756 } 757 758 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void) 759 { 760 PetscErrorCode ierr; 761 762 PetscFunctionBegin; 763 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 764 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767