1 #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64) 2 #define MKL_ILP64 3 #endif 4 5 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <math.h> 11 #include <mkl.h> 12 #include <mkl_cluster_sparse_solver.h> 13 14 /* 15 * Possible mkl_cpardiso phases that controls the execution of the solver. 16 * For more information check mkl_cpardiso manual. 17 */ 18 #define JOB_ANALYSIS 11 19 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 20 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 21 #define JOB_NUMERICAL_FACTORIZATION 22 22 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 23 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 24 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 25 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 26 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 27 #define JOB_RELEASE_OF_LU_MEMORY 0 28 #define JOB_RELEASE_OF_ALL_MEMORY -1 29 30 #define IPARM_SIZE 64 31 #define INT_TYPE MKL_INT 32 33 static const char *Err_MSG_CPardiso(int errNo){ 34 switch (errNo) { 35 case -1: 36 return "input inconsistent"; break; 37 case -2: 38 return "not enough memory"; break; 39 case -3: 40 return "reordering problem"; break; 41 case -4: 42 return "zero pivot, numerical factorization or iterative refinement problem"; break; 43 case -5: 44 return "unclassified (internal) error"; break; 45 case -6: 46 return "preordering failed (matrix types 11, 13 only)"; break; 47 case -7: 48 return "diagonal matrix problem"; break; 49 case -8: 50 return "32-bit integer overflow problem"; break; 51 case -9: 52 return "not enough memory for OOC"; break; 53 case -10: 54 return "problems with opening OOC temporary files"; break; 55 case -11: 56 return "read/write problems with the OOC data file"; break; 57 default : 58 return "unknown error"; 59 } 60 } 61 62 /* 63 * Internal data structure. 64 * For more information check mkl_cpardiso manual. 65 */ 66 67 typedef struct { 68 69 /* Configuration vector */ 70 INT_TYPE iparm[IPARM_SIZE]; 71 72 /* 73 * Internal mkl_cpardiso memory location. 74 * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak. 75 */ 76 void *pt[IPARM_SIZE]; 77 78 MPI_Comm comm_mkl_cpardiso; 79 80 /* Basic mkl_cpardiso info*/ 81 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 82 83 /* Matrix structure */ 84 PetscScalar *a; 85 86 INT_TYPE *ia, *ja; 87 88 /* Number of non-zero elements */ 89 INT_TYPE nz; 90 91 /* Row permutaton vector*/ 92 INT_TYPE *perm; 93 94 /* Define is matrix preserve sparce structure. */ 95 MatStructure matstruc; 96 97 PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**); 98 99 /* True if mkl_cpardiso function have been used. */ 100 PetscBool CleanUp; 101 } Mat_MKL_CPARDISO; 102 103 /* 104 * Copy the elements of matrix A. 105 * Input: 106 * - Mat A: MATSEQAIJ matrix 107 * - int shift: matrix index. 108 * - 0 for c representation 109 * - 1 for fortran representation 110 * - MatReuse reuse: 111 * - MAT_INITIAL_MATRIX: Create a new aij representation 112 * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values 113 * Output: 114 * - int *nnz: Number of nonzero-elements. 115 * - int **r pointer to i index 116 * - int **c pointer to j elements 117 * - MATRIXTYPE **v: Non-zero elements 118 */ 119 PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 120 { 121 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 122 123 PetscFunctionBegin; 124 *v=aa->a; 125 if (reuse == MAT_INITIAL_MATRIX) { 126 *r = (INT_TYPE*)aa->i; 127 *c = (INT_TYPE*)aa->j; 128 *nnz = aa->nz; 129 } 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 134 { 135 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 136 PetscErrorCode ierr; 137 PetscInt rstart,nz,i,j,countA,countB; 138 PetscInt *row,*col; 139 const PetscScalar *av, *bv; 140 PetscScalar *val; 141 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 142 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 143 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 144 PetscInt colA_start,jB,jcol; 145 146 PetscFunctionBegin; 147 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 148 av=aa->a; bv=bb->a; 149 150 garray = mat->garray; 151 152 if (reuse == MAT_INITIAL_MATRIX) { 153 nz = aa->nz + bb->nz; 154 *nnz = nz; 155 ierr = PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);CHKERRQ(ierr); 156 col = row + m + 1; 157 val = (PetscScalar*)(col + nz); 158 *r = row; *c = col; *v = val; 159 row[0] = 0; 160 } else { 161 row = *r; col = *c; val = *v; 162 } 163 164 nz = 0; 165 for (i=0; i<m; i++) { 166 row[i] = nz; 167 countA = ai[i+1] - ai[i]; 168 countB = bi[i+1] - bi[i]; 169 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 170 bjj = bj + bi[i]; 171 172 /* B part, smaller col index */ 173 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 174 jB = 0; 175 for (j=0; j<countB; j++) { 176 jcol = garray[bjj[j]]; 177 if (jcol > colA_start) { 178 jB = j; 179 break; 180 } 181 col[nz] = jcol; 182 val[nz++] = *bv++; 183 if (j==countB-1) jB = countB; 184 } 185 186 /* A part */ 187 for (j=0; j<countA; j++) { 188 col[nz] = rstart + ajj[j]; 189 val[nz++] = *av++; 190 } 191 192 /* B part, larger col index */ 193 for (j=jB; j<countB; j++) { 194 col[nz] = garray[bjj[j]]; 195 val[nz++] = *bv++; 196 } 197 } 198 row[m] = nz; 199 200 PetscFunctionReturn(0); 201 } 202 203 /* 204 * Free memory for Mat_MKL_CPARDISO structure and pointers to objects. 205 */ 206 PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A) 207 { 208 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 /* Terminate instance, deallocate memories */ 213 if (mat_mkl_cpardiso->CleanUp) { 214 mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 215 216 cluster_sparse_solver ( 217 mat_mkl_cpardiso->pt, 218 &mat_mkl_cpardiso->maxfct, 219 &mat_mkl_cpardiso->mnum, 220 &mat_mkl_cpardiso->mtype, 221 &mat_mkl_cpardiso->phase, 222 &mat_mkl_cpardiso->n, 223 NULL, 224 NULL, 225 NULL, 226 mat_mkl_cpardiso->perm, 227 &mat_mkl_cpardiso->nrhs, 228 mat_mkl_cpardiso->iparm, 229 &mat_mkl_cpardiso->msglvl, 230 NULL, 231 NULL, 232 &mat_mkl_cpardiso->comm_mkl_cpardiso, 233 (int*)&mat_mkl_cpardiso->err); 234 } 235 236 if (mat_mkl_cpardiso->ConvertToTriples == MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO) { 237 ierr = PetscFree(mat_mkl_cpardiso->ia);CHKERRQ(ierr); 238 } 239 ierr = MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 240 ierr = PetscFree(A->data);CHKERRQ(ierr); 241 242 /* clear composed functions */ 243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 244 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 /* 249 * Computes Ax = b 250 */ 251 PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x) 252 { 253 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 254 PetscErrorCode ierr; 255 PetscScalar *xarray; 256 const PetscScalar *barray; 257 258 PetscFunctionBegin; 259 mat_mkl_cpardiso->nrhs = 1; 260 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 261 ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 262 263 /* solve phase */ 264 /*-------------*/ 265 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 266 cluster_sparse_solver ( 267 mat_mkl_cpardiso->pt, 268 &mat_mkl_cpardiso->maxfct, 269 &mat_mkl_cpardiso->mnum, 270 &mat_mkl_cpardiso->mtype, 271 &mat_mkl_cpardiso->phase, 272 &mat_mkl_cpardiso->n, 273 mat_mkl_cpardiso->a, 274 mat_mkl_cpardiso->ia, 275 mat_mkl_cpardiso->ja, 276 mat_mkl_cpardiso->perm, 277 &mat_mkl_cpardiso->nrhs, 278 mat_mkl_cpardiso->iparm, 279 &mat_mkl_cpardiso->msglvl, 280 (void*)barray, 281 (void*)xarray, 282 &mat_mkl_cpardiso->comm_mkl_cpardiso, 283 (int*)&mat_mkl_cpardiso->err); 284 285 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 286 287 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 288 ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 289 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 290 PetscFunctionReturn(0); 291 } 292 293 PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x) 294 { 295 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 296 PetscErrorCode ierr; 297 298 PetscFunctionBegin; 299 #if defined(PETSC_USE_COMPLEX) 300 mat_mkl_cpardiso->iparm[12 - 1] = 1; 301 #else 302 mat_mkl_cpardiso->iparm[12 - 1] = 2; 303 #endif 304 ierr = MatSolve_MKL_CPARDISO(A,b,x);CHKERRQ(ierr); 305 mat_mkl_cpardiso->iparm[12 - 1] = 0; 306 PetscFunctionReturn(0); 307 } 308 309 PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X) 310 { 311 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data; 312 PetscErrorCode ierr; 313 PetscScalar *xarray; 314 const PetscScalar *barray; 315 PetscBool flg; 316 317 PetscFunctionBegin; 318 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);CHKERRQ(ierr); 319 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix"); 320 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);CHKERRQ(ierr); 321 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix"); 322 323 ierr = MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 324 325 if(mat_mkl_cpardiso->nrhs > 0){ 326 ierr = MatDenseGetArrayRead(B,&barray); 327 ierr = MatDenseGetArray(X,&xarray); 328 329 /* solve phase */ 330 /*-------------*/ 331 mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 332 cluster_sparse_solver ( 333 mat_mkl_cpardiso->pt, 334 &mat_mkl_cpardiso->maxfct, 335 &mat_mkl_cpardiso->mnum, 336 &mat_mkl_cpardiso->mtype, 337 &mat_mkl_cpardiso->phase, 338 &mat_mkl_cpardiso->n, 339 mat_mkl_cpardiso->a, 340 mat_mkl_cpardiso->ia, 341 mat_mkl_cpardiso->ja, 342 mat_mkl_cpardiso->perm, 343 &mat_mkl_cpardiso->nrhs, 344 mat_mkl_cpardiso->iparm, 345 &mat_mkl_cpardiso->msglvl, 346 (void*)barray, 347 (void*)xarray, 348 &mat_mkl_cpardiso->comm_mkl_cpardiso, 349 (int*)&mat_mkl_cpardiso->err); 350 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 351 ierr = MatDenseRestoreArrayRead(B,&barray); 352 ierr = MatDenseRestoreArray(X,&xarray); 353 354 } 355 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 356 PetscFunctionReturn(0); 357 358 } 359 360 /* 361 * LU Decomposition 362 */ 363 PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info) 364 { 365 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data; 366 PetscErrorCode ierr; 367 368 PetscFunctionBegin; 369 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 370 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 371 372 mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION; 373 cluster_sparse_solver ( 374 mat_mkl_cpardiso->pt, 375 &mat_mkl_cpardiso->maxfct, 376 &mat_mkl_cpardiso->mnum, 377 &mat_mkl_cpardiso->mtype, 378 &mat_mkl_cpardiso->phase, 379 &mat_mkl_cpardiso->n, 380 mat_mkl_cpardiso->a, 381 mat_mkl_cpardiso->ia, 382 mat_mkl_cpardiso->ja, 383 mat_mkl_cpardiso->perm, 384 &mat_mkl_cpardiso->nrhs, 385 mat_mkl_cpardiso->iparm, 386 &mat_mkl_cpardiso->msglvl, 387 NULL, 388 NULL, 389 &mat_mkl_cpardiso->comm_mkl_cpardiso, 390 &mat_mkl_cpardiso->err); 391 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 392 393 mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 394 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 395 PetscFunctionReturn(0); 396 } 397 398 /* Sets mkl_cpardiso options from the options database */ 399 PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A) 400 { 401 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 402 PetscErrorCode ierr; 403 PetscInt icntl; 404 PetscBool flg; 405 int threads; 406 407 PetscFunctionBegin; 408 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");CHKERRQ(ierr); 409 ierr = PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);CHKERRQ(ierr); 410 if (flg) mkl_set_num_threads(threads); 411 412 ierr = PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);CHKERRQ(ierr); 413 if (flg) mat_mkl_cpardiso->maxfct = icntl; 414 415 ierr = PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);CHKERRQ(ierr); 416 if (flg) mat_mkl_cpardiso->mnum = icntl; 417 418 ierr = PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);CHKERRQ(ierr); 419 if (flg) mat_mkl_cpardiso->msglvl = icntl; 420 421 ierr = PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);CHKERRQ(ierr); 422 if(flg){ 423 mat_mkl_cpardiso->mtype = icntl; 424 #if defined(PETSC_USE_REAL_SINGLE) 425 mat_mkl_cpardiso->iparm[27] = 1; 426 #else 427 mat_mkl_cpardiso->iparm[27] = 0; 428 #endif 429 mat_mkl_cpardiso->iparm[34] = 1; 430 } 431 ierr = PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);CHKERRQ(ierr); 432 433 if(flg && icntl != 0){ 434 ierr = PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);CHKERRQ(ierr); 435 if (flg) mat_mkl_cpardiso->iparm[1] = icntl; 436 437 ierr = PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);CHKERRQ(ierr); 438 if (flg) mat_mkl_cpardiso->iparm[3] = icntl; 439 440 ierr = PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);CHKERRQ(ierr); 441 if (flg) mat_mkl_cpardiso->iparm[4] = icntl; 442 443 ierr = PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);CHKERRQ(ierr); 444 if (flg) mat_mkl_cpardiso->iparm[5] = icntl; 445 446 ierr = PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);CHKERRQ(ierr); 447 if (flg) mat_mkl_cpardiso->iparm[7] = icntl; 448 449 ierr = PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);CHKERRQ(ierr); 450 if (flg) mat_mkl_cpardiso->iparm[9] = icntl; 451 452 ierr = PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);CHKERRQ(ierr); 453 if (flg) mat_mkl_cpardiso->iparm[10] = icntl; 454 455 ierr = PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);CHKERRQ(ierr); 456 if (flg) mat_mkl_cpardiso->iparm[11] = icntl; 457 458 ierr = PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl, 459 &flg);CHKERRQ(ierr); 460 if (flg) mat_mkl_cpardiso->iparm[12] = icntl; 461 462 ierr = PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl, 463 &flg);CHKERRQ(ierr); 464 if (flg) mat_mkl_cpardiso->iparm[17] = icntl; 465 466 ierr = PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);CHKERRQ(ierr); 467 if (flg) mat_mkl_cpardiso->iparm[18] = icntl; 468 469 ierr = PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);CHKERRQ(ierr); 470 if (flg) mat_mkl_cpardiso->iparm[20] = icntl; 471 472 ierr = PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);CHKERRQ(ierr); 473 if (flg) mat_mkl_cpardiso->iparm[23] = icntl; 474 475 ierr = PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);CHKERRQ(ierr); 476 if (flg) mat_mkl_cpardiso->iparm[24] = icntl; 477 478 ierr = PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);CHKERRQ(ierr); 479 if (flg) mat_mkl_cpardiso->iparm[26] = icntl; 480 481 ierr = PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);CHKERRQ(ierr); 482 if (flg) mat_mkl_cpardiso->iparm[30] = icntl; 483 484 ierr = PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);CHKERRQ(ierr); 485 if (flg) mat_mkl_cpardiso->iparm[33] = icntl; 486 487 ierr = PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);CHKERRQ(ierr); 488 if (flg) mat_mkl_cpardiso->iparm[59] = icntl; 489 } 490 491 ierr = PetscOptionsEnd();CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso) 496 { 497 PetscErrorCode ierr; 498 PetscMPIInt size; 499 500 PetscFunctionBegin; 501 502 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));CHKERRQ(ierr); 503 ierr = MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);CHKERRQ(ierr); 504 505 mat_mkl_cpardiso->CleanUp = PETSC_FALSE; 506 mat_mkl_cpardiso->maxfct = 1; 507 mat_mkl_cpardiso->mnum = 1; 508 mat_mkl_cpardiso->n = A->rmap->N; 509 mat_mkl_cpardiso->msglvl = 0; 510 mat_mkl_cpardiso->nrhs = 1; 511 mat_mkl_cpardiso->err = 0; 512 mat_mkl_cpardiso->phase = -1; 513 #if defined(PETSC_USE_COMPLEX) 514 mat_mkl_cpardiso->mtype = 13; 515 #else 516 mat_mkl_cpardiso->mtype = 11; 517 #endif 518 519 #if defined(PETSC_USE_REAL_SINGLE) 520 mat_mkl_cpardiso->iparm[27] = 1; 521 #else 522 mat_mkl_cpardiso->iparm[27] = 0; 523 #endif 524 525 mat_mkl_cpardiso->iparm[34] = 1; /* C style */ 526 527 mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ 528 mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */ 529 mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */ 530 mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */ 531 mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ 532 mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 533 mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 534 mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 535 mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 536 mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */ 537 538 mat_mkl_cpardiso->iparm[39] = 0; 539 if (size > 1) { 540 mat_mkl_cpardiso->iparm[39] = 2; 541 mat_mkl_cpardiso->iparm[40] = A->rmap->rstart; 542 mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1; 543 } 544 mat_mkl_cpardiso->perm = 0; 545 PetscFunctionReturn(0); 546 } 547 548 /* 549 * Symbolic decomposition. Mkl_Pardiso analysis phase. 550 */ 551 PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 552 { 553 Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data; 554 PetscErrorCode ierr; 555 556 PetscFunctionBegin; 557 mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 558 559 /* Set MKL_CPARDISO options from the options database */ 560 ierr = PetscSetMKL_CPARDISOFromOptions(F,A);CHKERRQ(ierr); 561 ierr = (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);CHKERRQ(ierr); 562 563 mat_mkl_cpardiso->n = A->rmap->N; 564 565 /* analysis phase */ 566 /*----------------*/ 567 mat_mkl_cpardiso->phase = JOB_ANALYSIS; 568 569 cluster_sparse_solver ( 570 mat_mkl_cpardiso->pt, 571 &mat_mkl_cpardiso->maxfct, 572 &mat_mkl_cpardiso->mnum, 573 &mat_mkl_cpardiso->mtype, 574 &mat_mkl_cpardiso->phase, 575 &mat_mkl_cpardiso->n, 576 mat_mkl_cpardiso->a, 577 mat_mkl_cpardiso->ia, 578 mat_mkl_cpardiso->ja, 579 mat_mkl_cpardiso->perm, 580 &mat_mkl_cpardiso->nrhs, 581 mat_mkl_cpardiso->iparm, 582 &mat_mkl_cpardiso->msglvl, 583 NULL, 584 NULL, 585 &mat_mkl_cpardiso->comm_mkl_cpardiso, 586 (int*)&mat_mkl_cpardiso->err); 587 588 if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 589 590 mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 591 F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO; 592 F->ops->solve = MatSolve_MKL_CPARDISO; 593 F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 594 F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 595 PetscFunctionReturn(0); 596 } 597 598 PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer) 599 { 600 PetscErrorCode ierr; 601 PetscBool iascii; 602 PetscViewerFormat format; 603 Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data; 604 PetscInt i; 605 606 PetscFunctionBegin; 607 /* check if matrix is mkl_cpardiso type */ 608 if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(0); 609 610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 611 if (iascii) { 612 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 613 if (format == PETSC_VIEWER_ASCII_INFO) { 614 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");CHKERRQ(ierr); 615 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);CHKERRQ(ierr); 616 for(i = 1; i <= 64; i++){ 617 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);CHKERRQ(ierr); 618 } 619 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);CHKERRQ(ierr); 620 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);CHKERRQ(ierr); 621 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);CHKERRQ(ierr); 622 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);CHKERRQ(ierr); 623 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);CHKERRQ(ierr); 624 ierr = PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);CHKERRQ(ierr); 625 } 626 } 627 PetscFunctionReturn(0); 628 } 629 630 PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info) 631 { 632 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->data; 633 634 PetscFunctionBegin; 635 info->block_size = 1.0; 636 info->nz_allocated = mat_mkl_cpardiso->nz + 0.0; 637 info->nz_unneeded = 0.0; 638 info->assemblies = 0.0; 639 info->mallocs = 0.0; 640 info->memory = 0.0; 641 info->fill_ratio_given = 0; 642 info->fill_ratio_needed = 0; 643 info->factor_mallocs = 0; 644 PetscFunctionReturn(0); 645 } 646 647 PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival) 648 { 649 Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->data; 650 651 PetscFunctionBegin; 652 if(icntl <= 64){ 653 mat_mkl_cpardiso->iparm[icntl - 1] = ival; 654 } else { 655 if(icntl == 65) mkl_set_num_threads((int)ival); 656 else if(icntl == 66) mat_mkl_cpardiso->maxfct = ival; 657 else if(icntl == 67) mat_mkl_cpardiso->mnum = ival; 658 else if(icntl == 68) mat_mkl_cpardiso->msglvl = ival; 659 else if(icntl == 69){ 660 mat_mkl_cpardiso->mtype = ival; 661 #if defined(PETSC_USE_REAL_SINGLE) 662 mat_mkl_cpardiso->iparm[27] = 1; 663 #else 664 mat_mkl_cpardiso->iparm[27] = 0; 665 #endif 666 mat_mkl_cpardiso->iparm[34] = 1; 667 } 668 } 669 PetscFunctionReturn(0); 670 } 671 672 /*@ 673 MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters 674 675 Logically Collective on Mat 676 677 Input Parameters: 678 + F - the factored matrix obtained by calling MatGetFactor() 679 . icntl - index of Mkl_Pardiso parameter 680 - ival - value of Mkl_Pardiso parameter 681 682 Options Database: 683 . -mat_mkl_cpardiso_<icntl> <ival> 684 685 Level: Intermediate 686 687 Notes: This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options 688 database approach when working with TS, SNES, or KSP. 689 690 References: 691 . Mkl_Pardiso Users' Guide 692 693 .seealso: MatGetFactor() 694 @*/ 695 PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival) 696 { 697 PetscErrorCode ierr; 698 699 PetscFunctionBegin; 700 ierr = PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type) 705 { 706 PetscFunctionBegin; 707 *type = MATSOLVERMKL_CPARDISO; 708 PetscFunctionReturn(0); 709 } 710 711 /* MatGetFactor for MPI AIJ matrices */ 712 static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F) 713 { 714 Mat B; 715 PetscErrorCode ierr; 716 Mat_MKL_CPARDISO *mat_mkl_cpardiso; 717 PetscBool isSeqAIJ; 718 719 PetscFunctionBegin; 720 /* Create the factorization matrix */ 721 722 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 723 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 724 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 725 ierr = PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);CHKERRQ(ierr); 726 ierr = MatSetUp(B);CHKERRQ(ierr); 727 728 ierr = PetscNewLog(B,&mat_mkl_cpardiso);CHKERRQ(ierr); 729 730 if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO; 731 else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO; 732 733 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO; 734 B->ops->destroy = MatDestroy_MKL_CPARDISO; 735 736 B->ops->view = MatView_MKL_CPARDISO; 737 B->ops->getinfo = MatGetInfo_MKL_CPARDISO; 738 739 B->factortype = ftype; 740 B->assembled = PETSC_TRUE; /* required by -ksp_view */ 741 742 B->data = mat_mkl_cpardiso; 743 744 /* set solvertype */ 745 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 746 ierr = PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);CHKERRQ(ierr); 747 748 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);CHKERRQ(ierr); 749 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);CHKERRQ(ierr); 750 ierr = PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);CHKERRQ(ierr); 751 752 *F = B; 753 PetscFunctionReturn(0); 754 } 755 756 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void) 757 { 758 PetscErrorCode ierr; 759 760 PetscFunctionBegin; 761 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 762 ierr = MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765