151d8ba46SPierre Jolivet #include <petscsys.h> 251d8ba46SPierre Jolivet #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 351d8ba46SPierre Jolivet #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4d305a81bSVasiliy Kozyrev 52475b7caSBarry Smith #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 669ab9685SBarry Smith #define MKL_ILP64 769ab9685SBarry Smith #endif 8d305a81bSVasiliy Kozyrev #include <mkl.h> 9d305a81bSVasiliy Kozyrev #include <mkl_cluster_sparse_solver.h> 10d305a81bSVasiliy Kozyrev 11d305a81bSVasiliy Kozyrev /* 12d305a81bSVasiliy Kozyrev * Possible mkl_cpardiso phases that controls the execution of the solver. 13d305a81bSVasiliy Kozyrev * For more information check mkl_cpardiso manual. 14d305a81bSVasiliy Kozyrev */ 15d305a81bSVasiliy Kozyrev #define JOB_ANALYSIS 11 16db1f124cSVasiliy Kozyrev #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 17db1f124cSVasiliy Kozyrev #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 18d305a81bSVasiliy Kozyrev #define JOB_NUMERICAL_FACTORIZATION 22 19db1f124cSVasiliy Kozyrev #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 20d305a81bSVasiliy Kozyrev #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 21db1f124cSVasiliy Kozyrev #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 22db1f124cSVasiliy Kozyrev #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 23db1f124cSVasiliy Kozyrev #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 24db1f124cSVasiliy Kozyrev #define JOB_RELEASE_OF_LU_MEMORY 0 25d305a81bSVasiliy Kozyrev #define JOB_RELEASE_OF_ALL_MEMORY -1 26d305a81bSVasiliy Kozyrev 27d305a81bSVasiliy Kozyrev #define IPARM_SIZE 64 28d305a81bSVasiliy Kozyrev #define INT_TYPE MKL_INT 29d305a81bSVasiliy Kozyrev 30d71ae5a4SJacob Faibussowitsch static const char *Err_MSG_CPardiso(int errNo) 31d71ae5a4SJacob Faibussowitsch { 32d305a81bSVasiliy Kozyrev switch (errNo) { 33d71ae5a4SJacob Faibussowitsch case -1: 34d71ae5a4SJacob Faibussowitsch return "input inconsistent"; 35d71ae5a4SJacob Faibussowitsch break; 36d71ae5a4SJacob Faibussowitsch case -2: 37d71ae5a4SJacob Faibussowitsch return "not enough memory"; 38d71ae5a4SJacob Faibussowitsch break; 39d71ae5a4SJacob Faibussowitsch case -3: 40d71ae5a4SJacob Faibussowitsch return "reordering problem"; 41d71ae5a4SJacob Faibussowitsch break; 42d71ae5a4SJacob Faibussowitsch case -4: 43d71ae5a4SJacob Faibussowitsch return "zero pivot, numerical factorization or iterative refinement problem"; 44d71ae5a4SJacob Faibussowitsch break; 45d71ae5a4SJacob Faibussowitsch case -5: 46d71ae5a4SJacob Faibussowitsch return "unclassified (internal) error"; 47d71ae5a4SJacob Faibussowitsch break; 48d71ae5a4SJacob Faibussowitsch case -6: 49d71ae5a4SJacob Faibussowitsch return "preordering failed (matrix types 11, 13 only)"; 50d71ae5a4SJacob Faibussowitsch break; 51d71ae5a4SJacob Faibussowitsch case -7: 52d71ae5a4SJacob Faibussowitsch return "diagonal matrix problem"; 53d71ae5a4SJacob Faibussowitsch break; 54d71ae5a4SJacob Faibussowitsch case -8: 55d71ae5a4SJacob Faibussowitsch return "32-bit integer overflow problem"; 56d71ae5a4SJacob Faibussowitsch break; 57d71ae5a4SJacob Faibussowitsch case -9: 58d71ae5a4SJacob Faibussowitsch return "not enough memory for OOC"; 59d71ae5a4SJacob Faibussowitsch break; 60d71ae5a4SJacob Faibussowitsch case -10: 61d71ae5a4SJacob Faibussowitsch return "problems with opening OOC temporary files"; 62d71ae5a4SJacob Faibussowitsch break; 63d71ae5a4SJacob Faibussowitsch case -11: 64d71ae5a4SJacob Faibussowitsch return "read/write problems with the OOC data file"; 65d71ae5a4SJacob Faibussowitsch break; 66d71ae5a4SJacob Faibussowitsch default: 67d71ae5a4SJacob Faibussowitsch return "unknown error"; 68d305a81bSVasiliy Kozyrev } 69d305a81bSVasiliy Kozyrev } 70d305a81bSVasiliy Kozyrev 71aaaa354bSBarry Smith #define PetscCallCluster(f) PetscStackCallExternalVoid("cluster_sparse_solver", f); 72aaaa354bSBarry Smith 73d305a81bSVasiliy Kozyrev /* 74d305a81bSVasiliy Kozyrev * Internal data structure. 75d305a81bSVasiliy Kozyrev * For more information check mkl_cpardiso manual. 76d305a81bSVasiliy Kozyrev */ 77d305a81bSVasiliy Kozyrev 78d305a81bSVasiliy Kozyrev typedef struct { 79d305a81bSVasiliy Kozyrev /* Configuration vector */ 80d305a81bSVasiliy Kozyrev INT_TYPE iparm[IPARM_SIZE]; 81d305a81bSVasiliy Kozyrev 82d305a81bSVasiliy Kozyrev /* 83d305a81bSVasiliy Kozyrev * Internal mkl_cpardiso memory location. 84d305a81bSVasiliy Kozyrev * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak. 85d305a81bSVasiliy Kozyrev */ 86d305a81bSVasiliy Kozyrev void *pt[IPARM_SIZE]; 87d305a81bSVasiliy Kozyrev 88ae02b5cfSPierre Jolivet MPI_Fint comm_mkl_cpardiso; 89d305a81bSVasiliy Kozyrev 90d305a81bSVasiliy Kozyrev /* Basic mkl_cpardiso info*/ 91d305a81bSVasiliy Kozyrev INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 92d305a81bSVasiliy Kozyrev 930b4b7b1cSBarry Smith /* Matrix values and matrix nonzero structure */ 94d305a81bSVasiliy Kozyrev PetscScalar *a; 95d305a81bSVasiliy Kozyrev 96d305a81bSVasiliy Kozyrev INT_TYPE *ia, *ja; 97d305a81bSVasiliy Kozyrev 98d305a81bSVasiliy Kozyrev /* Number of non-zero elements */ 99d305a81bSVasiliy Kozyrev INT_TYPE nz; 100d305a81bSVasiliy Kozyrev 101d305a81bSVasiliy Kozyrev /* Row permutaton vector*/ 102d305a81bSVasiliy Kozyrev INT_TYPE *perm; 103d305a81bSVasiliy Kozyrev 104a678f235SPierre Jolivet /* Define is matrix preserve sparse structure. */ 105d305a81bSVasiliy Kozyrev MatStructure matstruc; 106d305a81bSVasiliy Kozyrev 107d305a81bSVasiliy Kozyrev PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt *, PetscInt **, PetscInt **, PetscScalar **); 108d305a81bSVasiliy Kozyrev 109d305a81bSVasiliy Kozyrev /* True if mkl_cpardiso function have been used. */ 110d305a81bSVasiliy Kozyrev PetscBool CleanUp; 111d305a81bSVasiliy Kozyrev } Mat_MKL_CPARDISO; 112d305a81bSVasiliy Kozyrev 113d305a81bSVasiliy Kozyrev /* 114d305a81bSVasiliy Kozyrev * Copy the elements of matrix A. 115d305a81bSVasiliy Kozyrev * Input: 116d305a81bSVasiliy Kozyrev * - Mat A: MATSEQAIJ matrix 117d305a81bSVasiliy Kozyrev * - int shift: matrix index. 118d305a81bSVasiliy Kozyrev * - 0 for c representation 119d305a81bSVasiliy Kozyrev * - 1 for fortran representation 120d305a81bSVasiliy Kozyrev * - MatReuse reuse: 121d305a81bSVasiliy Kozyrev * - MAT_INITIAL_MATRIX: Create a new aij representation 122d305a81bSVasiliy Kozyrev * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values 123d305a81bSVasiliy Kozyrev * Output: 124d305a81bSVasiliy Kozyrev * - int *nnz: Number of nonzero-elements. 125d305a81bSVasiliy Kozyrev * - int **r pointer to i index 126d305a81bSVasiliy Kozyrev * - int **c pointer to j elements 127d305a81bSVasiliy Kozyrev * - MATRIXTYPE **v: Non-zero elements 128d305a81bSVasiliy Kozyrev */ 12966976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 130d71ae5a4SJacob Faibussowitsch { 131d305a81bSVasiliy Kozyrev Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 1322b26979fSBarry Smith 133d305a81bSVasiliy Kozyrev PetscFunctionBegin; 134d305a81bSVasiliy Kozyrev *v = aa->a; 135d305a81bSVasiliy Kozyrev if (reuse == MAT_INITIAL_MATRIX) { 136d305a81bSVasiliy Kozyrev *r = (INT_TYPE *)aa->i; 137d305a81bSVasiliy Kozyrev *c = (INT_TYPE *)aa->j; 138d305a81bSVasiliy Kozyrev *nnz = aa->nz; 139d305a81bSVasiliy Kozyrev } 1403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 141d305a81bSVasiliy Kozyrev } 142d305a81bSVasiliy Kozyrev 14366976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 144d71ae5a4SJacob Faibussowitsch { 145d305a81bSVasiliy Kozyrev const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 146566a9afdSBarry Smith PetscInt rstart, nz, i, j, countA, countB; 147d305a81bSVasiliy Kozyrev PetscInt *row, *col; 148566a9afdSBarry Smith const PetscScalar *av, *bv; 149d305a81bSVasiliy Kozyrev PetscScalar *val; 150d305a81bSVasiliy Kozyrev Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 151f4f49eeaSPierre Jolivet Mat_SeqAIJ *aa = (Mat_SeqAIJ *)mat->A->data; 152f4f49eeaSPierre Jolivet Mat_SeqAIJ *bb = (Mat_SeqAIJ *)mat->B->data; 153566a9afdSBarry Smith PetscInt colA_start, jB, jcol; 154d305a81bSVasiliy Kozyrev 155d305a81bSVasiliy Kozyrev PetscFunctionBegin; 1569371c9d4SSatish Balay ai = aa->i; 1579371c9d4SSatish Balay aj = aa->j; 1589371c9d4SSatish Balay bi = bb->i; 1599371c9d4SSatish Balay bj = bb->j; 1609371c9d4SSatish Balay rstart = A->rmap->rstart; 1619371c9d4SSatish Balay av = aa->a; 1629371c9d4SSatish Balay bv = bb->a; 163d305a81bSVasiliy Kozyrev 164d305a81bSVasiliy Kozyrev garray = mat->garray; 165d305a81bSVasiliy Kozyrev 166d305a81bSVasiliy Kozyrev if (reuse == MAT_INITIAL_MATRIX) { 167d305a81bSVasiliy Kozyrev nz = aa->nz + bb->nz; 168d305a81bSVasiliy Kozyrev *nnz = nz; 1699566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(m + 1, &row, nz, &col, nz, &val)); 1709371c9d4SSatish Balay *r = row; 1719371c9d4SSatish Balay *c = col; 1729371c9d4SSatish Balay *v = val; 173d305a81bSVasiliy Kozyrev } else { 1749371c9d4SSatish Balay row = *r; 1759371c9d4SSatish Balay col = *c; 1769371c9d4SSatish Balay val = *v; 177d305a81bSVasiliy Kozyrev } 178d305a81bSVasiliy Kozyrev 179d305a81bSVasiliy Kozyrev nz = 0; 180d305a81bSVasiliy Kozyrev for (i = 0; i < m; i++) { 181d305a81bSVasiliy Kozyrev row[i] = nz; 182d305a81bSVasiliy Kozyrev countA = ai[i + 1] - ai[i]; 183d305a81bSVasiliy Kozyrev countB = bi[i + 1] - bi[i]; 184d305a81bSVasiliy Kozyrev ajj = aj + ai[i]; /* ptr to the beginning of this row */ 185d305a81bSVasiliy Kozyrev bjj = bj + bi[i]; 186d305a81bSVasiliy Kozyrev 187d305a81bSVasiliy Kozyrev /* B part, smaller col index */ 188d305a81bSVasiliy Kozyrev colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 189d305a81bSVasiliy Kozyrev jB = 0; 190d305a81bSVasiliy Kozyrev for (j = 0; j < countB; j++) { 191d305a81bSVasiliy Kozyrev jcol = garray[bjj[j]]; 19251d8ba46SPierre Jolivet if (jcol > colA_start) break; 193d305a81bSVasiliy Kozyrev col[nz] = jcol; 194d305a81bSVasiliy Kozyrev val[nz++] = *bv++; 195d305a81bSVasiliy Kozyrev } 19651d8ba46SPierre Jolivet jB = j; 197d305a81bSVasiliy Kozyrev 198d305a81bSVasiliy Kozyrev /* A part */ 199d305a81bSVasiliy Kozyrev for (j = 0; j < countA; j++) { 200d305a81bSVasiliy Kozyrev col[nz] = rstart + ajj[j]; 201d305a81bSVasiliy Kozyrev val[nz++] = *av++; 202d305a81bSVasiliy Kozyrev } 203d305a81bSVasiliy Kozyrev 204d305a81bSVasiliy Kozyrev /* B part, larger col index */ 205d305a81bSVasiliy Kozyrev for (j = jB; j < countB; j++) { 206d305a81bSVasiliy Kozyrev col[nz] = garray[bjj[j]]; 207d305a81bSVasiliy Kozyrev val[nz++] = *bv++; 208d305a81bSVasiliy Kozyrev } 209d305a81bSVasiliy Kozyrev } 210d305a81bSVasiliy Kozyrev row[m] = nz; 2113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 212d305a81bSVasiliy Kozyrev } 213d305a81bSVasiliy Kozyrev 21466976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 215d71ae5a4SJacob Faibussowitsch { 21651d8ba46SPierre Jolivet const PetscInt *ai, *aj, *bi, *bj, *garray, bs = A->rmap->bs, bs2 = bs * bs, m = A->rmap->n / bs, *ajj, *bjj; 21751d8ba46SPierre Jolivet PetscInt rstart, nz, i, j, countA, countB; 21851d8ba46SPierre Jolivet PetscInt *row, *col; 21951d8ba46SPierre Jolivet const PetscScalar *av, *bv; 22051d8ba46SPierre Jolivet PetscScalar *val; 22151d8ba46SPierre Jolivet Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 222f4f49eeaSPierre Jolivet Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 223f4f49eeaSPierre Jolivet Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 22451d8ba46SPierre Jolivet PetscInt colA_start, jB, jcol; 22551d8ba46SPierre Jolivet 22651d8ba46SPierre Jolivet PetscFunctionBegin; 2279371c9d4SSatish Balay ai = aa->i; 2289371c9d4SSatish Balay aj = aa->j; 2299371c9d4SSatish Balay bi = bb->i; 2309371c9d4SSatish Balay bj = bb->j; 2319371c9d4SSatish Balay rstart = A->rmap->rstart / bs; 2329371c9d4SSatish Balay av = aa->a; 2339371c9d4SSatish Balay bv = bb->a; 23451d8ba46SPierre Jolivet 23551d8ba46SPierre Jolivet garray = mat->garray; 23651d8ba46SPierre Jolivet 23751d8ba46SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 23851d8ba46SPierre Jolivet nz = aa->nz + bb->nz; 23951d8ba46SPierre Jolivet *nnz = nz; 2409566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(m + 1, &row, nz, &col, nz * bs2, &val)); 2419371c9d4SSatish Balay *r = row; 2429371c9d4SSatish Balay *c = col; 2439371c9d4SSatish Balay *v = val; 24451d8ba46SPierre Jolivet } else { 2459371c9d4SSatish Balay row = *r; 2469371c9d4SSatish Balay col = *c; 2479371c9d4SSatish Balay val = *v; 24851d8ba46SPierre Jolivet } 24951d8ba46SPierre Jolivet 25051d8ba46SPierre Jolivet nz = 0; 25151d8ba46SPierre Jolivet for (i = 0; i < m; i++) { 25251d8ba46SPierre Jolivet row[i] = nz + 1; 25351d8ba46SPierre Jolivet countA = ai[i + 1] - ai[i]; 25451d8ba46SPierre Jolivet countB = bi[i + 1] - bi[i]; 25551d8ba46SPierre Jolivet ajj = aj + ai[i]; /* ptr to the beginning of this row */ 25651d8ba46SPierre Jolivet bjj = bj + bi[i]; 25751d8ba46SPierre Jolivet 25851d8ba46SPierre Jolivet /* B part, smaller col index */ 25951d8ba46SPierre Jolivet colA_start = rstart + (countA > 0 ? ajj[0] : 0); /* the smallest global col index of A */ 26051d8ba46SPierre Jolivet jB = 0; 26151d8ba46SPierre Jolivet for (j = 0; j < countB; j++) { 26251d8ba46SPierre Jolivet jcol = garray[bjj[j]]; 26351d8ba46SPierre Jolivet if (jcol > colA_start) break; 26451d8ba46SPierre Jolivet col[nz++] = jcol + 1; 26551d8ba46SPierre Jolivet } 26651d8ba46SPierre Jolivet jB = j; 2679566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, bv, jB * bs2)); 26851d8ba46SPierre Jolivet val += jB * bs2; 26951d8ba46SPierre Jolivet bv += jB * bs2; 27051d8ba46SPierre Jolivet 27151d8ba46SPierre Jolivet /* A part */ 27251d8ba46SPierre Jolivet for (j = 0; j < countA; j++) col[nz++] = rstart + ajj[j] + 1; 2739566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, av, countA * bs2)); 27451d8ba46SPierre Jolivet val += countA * bs2; 27551d8ba46SPierre Jolivet av += countA * bs2; 27651d8ba46SPierre Jolivet 27751d8ba46SPierre Jolivet /* B part, larger col index */ 27851d8ba46SPierre Jolivet for (j = jB; j < countB; j++) col[nz++] = garray[bjj[j]] + 1; 2799566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, bv, (countB - jB) * bs2)); 28051d8ba46SPierre Jolivet val += (countB - jB) * bs2; 28151d8ba46SPierre Jolivet bv += (countB - jB) * bs2; 28251d8ba46SPierre Jolivet } 28351d8ba46SPierre Jolivet row[m] = nz + 1; 2843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28551d8ba46SPierre Jolivet } 28651d8ba46SPierre Jolivet 28766976f2fSJacob Faibussowitsch static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v) 288d71ae5a4SJacob Faibussowitsch { 28951d8ba46SPierre Jolivet const PetscInt *ai, *aj, *bi, *bj, *garray, bs = A->rmap->bs, bs2 = bs * bs, m = A->rmap->n / bs, *ajj, *bjj; 29051d8ba46SPierre Jolivet PetscInt rstart, nz, i, j, countA, countB; 29151d8ba46SPierre Jolivet PetscInt *row, *col; 29251d8ba46SPierre Jolivet const PetscScalar *av, *bv; 29351d8ba46SPierre Jolivet PetscScalar *val; 29451d8ba46SPierre Jolivet Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 295f4f49eeaSPierre Jolivet Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 296f4f49eeaSPierre Jolivet Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 29751d8ba46SPierre Jolivet 29851d8ba46SPierre Jolivet PetscFunctionBegin; 2999371c9d4SSatish Balay ai = aa->i; 3009371c9d4SSatish Balay aj = aa->j; 3019371c9d4SSatish Balay bi = bb->i; 3029371c9d4SSatish Balay bj = bb->j; 3039371c9d4SSatish Balay rstart = A->rmap->rstart / bs; 3049371c9d4SSatish Balay av = aa->a; 3059371c9d4SSatish Balay bv = bb->a; 30651d8ba46SPierre Jolivet 30751d8ba46SPierre Jolivet garray = mat->garray; 30851d8ba46SPierre Jolivet 30951d8ba46SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) { 31051d8ba46SPierre Jolivet nz = aa->nz + bb->nz; 31151d8ba46SPierre Jolivet *nnz = nz; 3129566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(m + 1, &row, nz, &col, nz * bs2, &val)); 3139371c9d4SSatish Balay *r = row; 3149371c9d4SSatish Balay *c = col; 3159371c9d4SSatish Balay *v = val; 31651d8ba46SPierre Jolivet } else { 3179371c9d4SSatish Balay row = *r; 3189371c9d4SSatish Balay col = *c; 3199371c9d4SSatish Balay val = *v; 32051d8ba46SPierre Jolivet } 32151d8ba46SPierre Jolivet 32251d8ba46SPierre Jolivet nz = 0; 32351d8ba46SPierre Jolivet for (i = 0; i < m; i++) { 32451d8ba46SPierre Jolivet row[i] = nz + 1; 32551d8ba46SPierre Jolivet countA = ai[i + 1] - ai[i]; 32651d8ba46SPierre Jolivet countB = bi[i + 1] - bi[i]; 32751d8ba46SPierre Jolivet ajj = aj + ai[i]; /* ptr to the beginning of this row */ 32851d8ba46SPierre Jolivet bjj = bj + bi[i]; 32951d8ba46SPierre Jolivet 33051d8ba46SPierre Jolivet /* A part */ 33151d8ba46SPierre Jolivet for (j = 0; j < countA; j++) col[nz++] = rstart + ajj[j] + 1; 3329566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, av, countA * bs2)); 33351d8ba46SPierre Jolivet val += countA * bs2; 33451d8ba46SPierre Jolivet av += countA * bs2; 33551d8ba46SPierre Jolivet 33651d8ba46SPierre Jolivet /* B part, larger col index */ 33751d8ba46SPierre Jolivet for (j = 0; j < countB; j++) col[nz++] = garray[bjj[j]] + 1; 3389566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(val, bv, countB * bs2)); 33951d8ba46SPierre Jolivet val += countB * bs2; 34051d8ba46SPierre Jolivet bv += countB * bs2; 34151d8ba46SPierre Jolivet } 34251d8ba46SPierre Jolivet row[m] = nz + 1; 3433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34451d8ba46SPierre Jolivet } 34551d8ba46SPierre Jolivet 346d305a81bSVasiliy Kozyrev /* 347d305a81bSVasiliy Kozyrev * Free memory for Mat_MKL_CPARDISO structure and pointers to objects. 348d305a81bSVasiliy Kozyrev */ 34966976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A) 350d71ae5a4SJacob Faibussowitsch { 351660873c6SBarry Smith Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data; 352ae02b5cfSPierre Jolivet MPI_Comm comm; 353d305a81bSVasiliy Kozyrev 354d305a81bSVasiliy Kozyrev PetscFunctionBegin; 355d305a81bSVasiliy Kozyrev /* Terminate instance, deallocate memories */ 356d305a81bSVasiliy Kozyrev if (mat_mkl_cpardiso->CleanUp) { 357d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 358d305a81bSVasiliy Kozyrev 359aaaa354bSBarry Smith PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, NULL, NULL, NULL, mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, 360aaaa354bSBarry Smith mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err)); 361d305a81bSVasiliy Kozyrev } 36248a46eb9SPierre Jolivet if (mat_mkl_cpardiso->ConvertToTriples != MatCopy_seqaij_seqaij_MKL_CPARDISO) PetscCall(PetscFree3(mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja, mat_mkl_cpardiso->a)); 363ae02b5cfSPierre Jolivet comm = MPI_Comm_f2c(mat_mkl_cpardiso->comm_mkl_cpardiso); 3649566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&comm)); 3659566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 366d305a81bSVasiliy Kozyrev 367d305a81bSVasiliy Kozyrev /* clear composed functions */ 3689566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 3699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMkl_CPardisoSetCntl_C", NULL)); 3703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 371d305a81bSVasiliy Kozyrev } 372d305a81bSVasiliy Kozyrev 373d305a81bSVasiliy Kozyrev /* 374d305a81bSVasiliy Kozyrev * Computes Ax = b 375d305a81bSVasiliy Kozyrev */ 37666976f2fSJacob Faibussowitsch static PetscErrorCode MatSolve_MKL_CPARDISO(Mat A, Vec b, Vec x) 377d71ae5a4SJacob Faibussowitsch { 378bd5dbebeSNils Friess Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data; 379d9ca1df4SBarry Smith PetscScalar *xarray; 380d9ca1df4SBarry Smith const PetscScalar *barray; 381d305a81bSVasiliy Kozyrev 382d305a81bSVasiliy Kozyrev PetscFunctionBegin; 383d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->nrhs = 1; 3849566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &xarray)); 3859566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(b, &barray)); 386d305a81bSVasiliy Kozyrev 387d305a81bSVasiliy Kozyrev /* solve phase */ 388d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 389aaaa354bSBarry Smith PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja, 390aaaa354bSBarry Smith mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err)); 3911d27aa22SBarry Smith PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\". Please check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 392d305a81bSVasiliy Kozyrev 3939566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(x, &xarray)); 3949566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(b, &barray)); 395d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 3963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 397d305a81bSVasiliy Kozyrev } 398d305a81bSVasiliy Kozyrev 399bd5dbebeSNils Friess static PetscErrorCode MatForwardSolve_MKL_CPARDISO(Mat A, Vec b, Vec x) 400bd5dbebeSNils Friess { 401bd5dbebeSNils Friess Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data; 402bd5dbebeSNils Friess PetscScalar *xarray; 403bd5dbebeSNils Friess const PetscScalar *barray; 404bd5dbebeSNils Friess 405bd5dbebeSNils Friess PetscFunctionBegin; 406bd5dbebeSNils Friess mat_mkl_cpardiso->nrhs = 1; 407bd5dbebeSNils Friess PetscCall(VecGetArray(x, &xarray)); 408bd5dbebeSNils Friess PetscCall(VecGetArrayRead(b, &barray)); 409bd5dbebeSNils Friess 410bd5dbebeSNils Friess /* solve phase */ 411bd5dbebeSNils Friess mat_mkl_cpardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 412aaaa354bSBarry Smith PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja, 413aaaa354bSBarry Smith mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err)); 414bd5dbebeSNils Friess PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\". Please check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 415bd5dbebeSNils Friess 416bd5dbebeSNils Friess PetscCall(VecRestoreArray(x, &xarray)); 417bd5dbebeSNils Friess PetscCall(VecRestoreArrayRead(b, &barray)); 418bd5dbebeSNils Friess mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 419bd5dbebeSNils Friess PetscFunctionReturn(PETSC_SUCCESS); 420bd5dbebeSNils Friess } 421bd5dbebeSNils Friess 422bd5dbebeSNils Friess static PetscErrorCode MatBackwardSolve_MKL_CPARDISO(Mat A, Vec b, Vec x) 423bd5dbebeSNils Friess { 424bd5dbebeSNils Friess Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data; 425bd5dbebeSNils Friess PetscScalar *xarray; 426bd5dbebeSNils Friess const PetscScalar *barray; 427bd5dbebeSNils Friess 428bd5dbebeSNils Friess PetscFunctionBegin; 429bd5dbebeSNils Friess mat_mkl_cpardiso->nrhs = 1; 430bd5dbebeSNils Friess PetscCall(VecGetArray(x, &xarray)); 431bd5dbebeSNils Friess PetscCall(VecGetArrayRead(b, &barray)); 432bd5dbebeSNils Friess 433bd5dbebeSNils Friess /* solve phase */ 434bd5dbebeSNils Friess mat_mkl_cpardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 435aaaa354bSBarry Smith PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja, 436aaaa354bSBarry Smith mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err)); 437bd5dbebeSNils Friess PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\". Please check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 438bd5dbebeSNils Friess 439bd5dbebeSNils Friess PetscCall(VecRestoreArray(x, &xarray)); 440bd5dbebeSNils Friess PetscCall(VecRestoreArrayRead(b, &barray)); 441bd5dbebeSNils Friess mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 442bd5dbebeSNils Friess PetscFunctionReturn(PETSC_SUCCESS); 443bd5dbebeSNils Friess } 444bd5dbebeSNils Friess 44566976f2fSJacob Faibussowitsch static PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A, Vec b, Vec x) 446d71ae5a4SJacob Faibussowitsch { 447660873c6SBarry Smith Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data; 448d305a81bSVasiliy Kozyrev 449d305a81bSVasiliy Kozyrev PetscFunctionBegin; 450*fc2fb351SPierre Jolivet mat_mkl_cpardiso->iparm[12 - 1] = PetscDefined(USE_COMPLEX) ? 1 : 2; 4519566063dSJacob Faibussowitsch PetscCall(MatSolve_MKL_CPARDISO(A, b, x)); 452d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[12 - 1] = 0; 4533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 454d305a81bSVasiliy Kozyrev } 455d305a81bSVasiliy Kozyrev 45666976f2fSJacob Faibussowitsch static PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A, Mat B, Mat X) 457d71ae5a4SJacob Faibussowitsch { 458bd5dbebeSNils Friess Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data; 4598572280aSBarry Smith PetscScalar *xarray; 460680d2628SBarry Smith const PetscScalar *barray; 461d305a81bSVasiliy Kozyrev 462d305a81bSVasiliy Kozyrev PetscFunctionBegin; 4639566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, NULL, (PetscInt *)&mat_mkl_cpardiso->nrhs)); 464d305a81bSVasiliy Kozyrev 465d305a81bSVasiliy Kozyrev if (mat_mkl_cpardiso->nrhs > 0) { 4669566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &barray)); 4679566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &xarray)); 468d305a81bSVasiliy Kozyrev 46908401ef6SPierre Jolivet PetscCheck(barray != xarray, PETSC_COMM_SELF, PETSC_ERR_SUP, "B and X cannot share the same memory location"); 470f952d8e8SHong Zhang 471d305a81bSVasiliy Kozyrev /* solve phase */ 472d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 473aaaa354bSBarry Smith PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja, 474aaaa354bSBarry Smith mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err)); 4751d27aa22SBarry Smith PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\". Please check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 4769566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &barray)); 4779566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &xarray)); 478d305a81bSVasiliy Kozyrev } 479d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 4803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 481d305a81bSVasiliy Kozyrev } 482d305a81bSVasiliy Kozyrev 483d305a81bSVasiliy Kozyrev /* 484d305a81bSVasiliy Kozyrev * LU Decomposition 485d305a81bSVasiliy Kozyrev */ 48666976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F, Mat A, const MatFactorInfo *info) 487d71ae5a4SJacob Faibussowitsch { 488bd5dbebeSNils Friess Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data; 489d305a81bSVasiliy Kozyrev 490d305a81bSVasiliy Kozyrev PetscFunctionBegin; 491d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 4929566063dSJacob Faibussowitsch PetscCall((*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a)); 493d305a81bSVasiliy Kozyrev 494d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION; 495aaaa354bSBarry Smith PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja, 496aaaa354bSBarry Smith mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, &mat_mkl_cpardiso->err)); 4971d27aa22SBarry Smith PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\". Please check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 498d305a81bSVasiliy Kozyrev 499d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN; 500d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 5013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 502d305a81bSVasiliy Kozyrev } 503d305a81bSVasiliy Kozyrev 504d305a81bSVasiliy Kozyrev /* Sets mkl_cpardiso options from the options database */ 50566976f2fSJacob Faibussowitsch static PetscErrorCode MatSetFromOptions_MKL_CPARDISO(Mat F, Mat A) 506d71ae5a4SJacob Faibussowitsch { 507660873c6SBarry Smith Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data; 5084352ede6SMatthew G. Knepley PetscInt icntl, threads; 509d305a81bSVasiliy Kozyrev PetscBool flg; 510d305a81bSVasiliy Kozyrev 511d305a81bSVasiliy Kozyrev PetscFunctionBegin; 5121d27aa22SBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MKL Cluster PARDISO Options", "Mat"); 5131d27aa22SBarry Smith PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_65", "Suggested number of threads to use within MKL Cluster PARDISO", "None", threads, &threads, &flg)); 5144352ede6SMatthew G. Knepley if (flg) mkl_set_num_threads((int)threads); 515d305a81bSVasiliy Kozyrev 5169566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_66", "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time", "None", mat_mkl_cpardiso->maxfct, &icntl, &flg)); 517d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->maxfct = icntl; 518d305a81bSVasiliy Kozyrev 5199566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_67", "Indicates the actual matrix for the solution phase", "None", mat_mkl_cpardiso->mnum, &icntl, &flg)); 520d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->mnum = icntl; 521d305a81bSVasiliy Kozyrev 5229566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_68", "Message level information", "None", mat_mkl_cpardiso->msglvl, &icntl, &flg)); 523d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->msglvl = icntl; 524d305a81bSVasiliy Kozyrev 5259566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_69", "Defines the matrix type", "None", mat_mkl_cpardiso->mtype, &icntl, &flg)); 526560a203cSprj- if (flg) mat_mkl_cpardiso->mtype = icntl; 5279566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_1", "Use default values", "None", mat_mkl_cpardiso->iparm[0], &icntl, &flg)); 528d305a81bSVasiliy Kozyrev 529d305a81bSVasiliy Kozyrev if (flg && icntl != 0) { 5309566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_2", "Fill-in reducing ordering for the input matrix", "None", mat_mkl_cpardiso->iparm[1], &icntl, &flg)); 531d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[1] = icntl; 532d305a81bSVasiliy Kozyrev 5339566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_4", "Preconditioned CGS/CG", "None", mat_mkl_cpardiso->iparm[3], &icntl, &flg)); 534d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[3] = icntl; 535d305a81bSVasiliy Kozyrev 5369566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_5", "User permutation", "None", mat_mkl_cpardiso->iparm[4], &icntl, &flg)); 537d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[4] = icntl; 538d305a81bSVasiliy Kozyrev 5399566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_6", "Write solution on x", "None", mat_mkl_cpardiso->iparm[5], &icntl, &flg)); 540d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[5] = icntl; 541d305a81bSVasiliy Kozyrev 5429566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_8", "Iterative refinement step", "None", mat_mkl_cpardiso->iparm[7], &icntl, &flg)); 543d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[7] = icntl; 544d305a81bSVasiliy Kozyrev 5459566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_10", "Pivoting perturbation", "None", mat_mkl_cpardiso->iparm[9], &icntl, &flg)); 546d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[9] = icntl; 547d305a81bSVasiliy Kozyrev 5489566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_11", "Scaling vectors", "None", mat_mkl_cpardiso->iparm[10], &icntl, &flg)); 549d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[10] = icntl; 550d305a81bSVasiliy Kozyrev 5519566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_12", "Solve with transposed or conjugate transposed matrix A", "None", mat_mkl_cpardiso->iparm[11], &icntl, &flg)); 552d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[11] = icntl; 553d305a81bSVasiliy Kozyrev 554d0609cedSBarry Smith PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_13", "Improved accuracy using (non-) symmetric weighted matching", "None", mat_mkl_cpardiso->iparm[12], &icntl, &flg)); 555d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[12] = icntl; 556d305a81bSVasiliy Kozyrev 557d0609cedSBarry Smith PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_18", "Numbers of non-zero elements", "None", mat_mkl_cpardiso->iparm[17], &icntl, &flg)); 558d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[17] = icntl; 559d305a81bSVasiliy Kozyrev 5609566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_19", "Report number of floating point operations", "None", mat_mkl_cpardiso->iparm[18], &icntl, &flg)); 561d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[18] = icntl; 562d305a81bSVasiliy Kozyrev 5639566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_21", "Pivoting for symmetric indefinite matrices", "None", mat_mkl_cpardiso->iparm[20], &icntl, &flg)); 564d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[20] = icntl; 565d305a81bSVasiliy Kozyrev 5669566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_24", "Parallel factorization control", "None", mat_mkl_cpardiso->iparm[23], &icntl, &flg)); 567d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[23] = icntl; 568d305a81bSVasiliy Kozyrev 5699566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_25", "Parallel forward/backward solve control", "None", mat_mkl_cpardiso->iparm[24], &icntl, &flg)); 570d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[24] = icntl; 571d305a81bSVasiliy Kozyrev 5729566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_27", "Matrix checker", "None", mat_mkl_cpardiso->iparm[26], &icntl, &flg)); 573d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[26] = icntl; 574d305a81bSVasiliy Kozyrev 5759566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_31", "Partial solve and computing selected components of the solution vectors", "None", mat_mkl_cpardiso->iparm[30], &icntl, &flg)); 576d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[30] = icntl; 577d305a81bSVasiliy Kozyrev 5789566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_34", "Optimal number of threads for conditional numerical reproducibility (CNR) mode", "None", mat_mkl_cpardiso->iparm[33], &icntl, &flg)); 579d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[33] = icntl; 580d305a81bSVasiliy Kozyrev 5811d27aa22SBarry Smith PetscCall(PetscOptionsInt("-mat_mkl_cpardiso_60", "Intel MKL Cluster PARDISO mode", "None", mat_mkl_cpardiso->iparm[59], &icntl, &flg)); 582d305a81bSVasiliy Kozyrev if (flg) mat_mkl_cpardiso->iparm[59] = icntl; 583d305a81bSVasiliy Kozyrev } 584d305a81bSVasiliy Kozyrev 585d0609cedSBarry Smith PetscOptionsEnd(); 5863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 587d305a81bSVasiliy Kozyrev } 588d305a81bSVasiliy Kozyrev 58966976f2fSJacob Faibussowitsch static PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso) 590d71ae5a4SJacob Faibussowitsch { 59151d8ba46SPierre Jolivet PetscInt bs; 59251d8ba46SPierre Jolivet PetscBool match; 5932b26979fSBarry Smith PetscMPIInt size; 594ae02b5cfSPierre Jolivet MPI_Comm comm; 595d305a81bSVasiliy Kozyrev 596d305a81bSVasiliy Kozyrev PetscFunctionBegin; 5979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)A), &comm)); 5989566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 599ae02b5cfSPierre Jolivet mat_mkl_cpardiso->comm_mkl_cpardiso = MPI_Comm_c2f(comm); 600d305a81bSVasiliy Kozyrev 601d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->CleanUp = PETSC_FALSE; 602d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->maxfct = 1; 603d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->mnum = 1; 604d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->n = A->rmap->N; 60551d8ba46SPierre Jolivet if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36]; 606d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->msglvl = 0; 607d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->nrhs = 1; 608d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->err = 0; 609d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->phase = -1; 610*fc2fb351SPierre Jolivet mat_mkl_cpardiso->mtype = PetscDefined(USE_COMPLEX) ? 13 : 11; 611d305a81bSVasiliy Kozyrev 612*fc2fb351SPierre Jolivet mat_mkl_cpardiso->iparm[27] = PetscDefined(USE_REAL_SINGLE) ? 1 : 0; 613d305a81bSVasiliy Kozyrev 614da81f932SPierre Jolivet mat_mkl_cpardiso->iparm[0] = 1; /* Solver default parameters overridden with provided by iparm */ 615d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[1] = 2; /* Use METIS for fill-in reordering */ 616d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[5] = 0; /* Write solution into x */ 617d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[7] = 2; /* Max number of iterative refinement steps */ 618d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[9] = 13; /* Perturb the pivot elements with 1E-13 */ 619d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 620d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 621d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 622d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 623d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */ 624d305a81bSVasiliy Kozyrev 625d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[39] = 0; 6262b26979fSBarry Smith if (size > 1) { 627d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[39] = 2; 628d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[40] = A->rmap->rstart; 629d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[41] = A->rmap->rend - 1; 630d305a81bSVasiliy Kozyrev } 6319566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &match, MATMPIBAIJ, MATMPISBAIJ, "")); 63251d8ba46SPierre Jolivet if (match) { 6339566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 63451d8ba46SPierre Jolivet mat_mkl_cpardiso->iparm[36] = bs; 63551d8ba46SPierre Jolivet mat_mkl_cpardiso->iparm[40] /= bs; 63651d8ba46SPierre Jolivet mat_mkl_cpardiso->iparm[41] /= bs; 63751d8ba46SPierre Jolivet mat_mkl_cpardiso->iparm[40]++; 63851d8ba46SPierre Jolivet mat_mkl_cpardiso->iparm[41]++; 63951d8ba46SPierre Jolivet mat_mkl_cpardiso->iparm[34] = 0; /* Fortran style */ 64051d8ba46SPierre Jolivet } else { 64151d8ba46SPierre Jolivet mat_mkl_cpardiso->iparm[34] = 1; /* C style */ 64251d8ba46SPierre Jolivet } 64351d8ba46SPierre Jolivet 644d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->perm = 0; 6453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 646d305a81bSVasiliy Kozyrev } 647d305a81bSVasiliy Kozyrev 648d305a81bSVasiliy Kozyrev /* 649d305a81bSVasiliy Kozyrev * Symbolic decomposition. Mkl_Pardiso analysis phase. 650d305a81bSVasiliy Kozyrev */ 65166976f2fSJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 652d71ae5a4SJacob Faibussowitsch { 653660873c6SBarry Smith Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data; 654d305a81bSVasiliy Kozyrev 655d305a81bSVasiliy Kozyrev PetscFunctionBegin; 656d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 657d305a81bSVasiliy Kozyrev 658d305a81bSVasiliy Kozyrev /* Set MKL_CPARDISO options from the options database */ 65926cc229bSBarry Smith PetscCall(MatSetFromOptions_MKL_CPARDISO(F, A)); 6609566063dSJacob Faibussowitsch PetscCall((*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_INITIAL_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a)); 661d305a81bSVasiliy Kozyrev 662d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->n = A->rmap->N; 66351d8ba46SPierre Jolivet if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36]; 664d305a81bSVasiliy Kozyrev 665d305a81bSVasiliy Kozyrev /* analysis phase */ 666d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->phase = JOB_ANALYSIS; 667d305a81bSVasiliy Kozyrev 668aaaa354bSBarry Smith PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja, 669aaaa354bSBarry Smith mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err)); 6701d27aa22SBarry Smith PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\".Check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 671d305a81bSVasiliy Kozyrev 672d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 673d305a81bSVasiliy Kozyrev F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO; 674d305a81bSVasiliy Kozyrev F->ops->solve = MatSolve_MKL_CPARDISO; 675bd5dbebeSNils Friess F->ops->forwardsolve = MatForwardSolve_MKL_CPARDISO; 676bd5dbebeSNils Friess F->ops->backwardsolve = MatBackwardSolve_MKL_CPARDISO; 677d305a81bSVasiliy Kozyrev F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 678d305a81bSVasiliy Kozyrev F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 6793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 680d305a81bSVasiliy Kozyrev } 681d305a81bSVasiliy Kozyrev 68266976f2fSJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_CPARDISO(Mat F, Mat A, IS perm, const MatFactorInfo *info) 683d71ae5a4SJacob Faibussowitsch { 68451d8ba46SPierre Jolivet Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data; 68551d8ba46SPierre Jolivet 68651d8ba46SPierre Jolivet PetscFunctionBegin; 68751d8ba46SPierre Jolivet mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 68851d8ba46SPierre Jolivet 68951d8ba46SPierre Jolivet /* Set MKL_CPARDISO options from the options database */ 69026cc229bSBarry Smith PetscCall(MatSetFromOptions_MKL_CPARDISO(F, A)); 6919566063dSJacob Faibussowitsch PetscCall((*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_INITIAL_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a)); 69251d8ba46SPierre Jolivet 69351d8ba46SPierre Jolivet mat_mkl_cpardiso->n = A->rmap->N; 69451d8ba46SPierre Jolivet if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36]; 6958b933ea1SPierre Jolivet PetscCheck(!PetscDefined(USE_COMPLEX), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead"); 696b94d7dedSBarry Smith if (A->spd == PETSC_BOOL3_TRUE) mat_mkl_cpardiso->mtype = 2; 69751d8ba46SPierre Jolivet else mat_mkl_cpardiso->mtype = -2; 69851d8ba46SPierre Jolivet 69951d8ba46SPierre Jolivet /* analysis phase */ 70051d8ba46SPierre Jolivet mat_mkl_cpardiso->phase = JOB_ANALYSIS; 70151d8ba46SPierre Jolivet 702aaaa354bSBarry Smith PetscCallCluster(cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja, 703aaaa354bSBarry Smith mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err)); 7041d27aa22SBarry Smith PetscCheck(mat_mkl_cpardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL Cluster PARDISO: err=%d, msg = \"%s\".Check manual", mat_mkl_cpardiso->err, Err_MSG_CPardiso(mat_mkl_cpardiso->err)); 70551d8ba46SPierre Jolivet 70651d8ba46SPierre Jolivet mat_mkl_cpardiso->CleanUp = PETSC_TRUE; 70751d8ba46SPierre Jolivet F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_CPARDISO; 70851d8ba46SPierre Jolivet F->ops->solve = MatSolve_MKL_CPARDISO; 70951d8ba46SPierre Jolivet F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO; 71051d8ba46SPierre Jolivet F->ops->matsolve = MatMatSolve_MKL_CPARDISO; 711bd5dbebeSNils Friess if (A->spd == PETSC_BOOL3_TRUE) { 712bd5dbebeSNils Friess F->ops->forwardsolve = MatForwardSolve_MKL_CPARDISO; 713bd5dbebeSNils Friess F->ops->backwardsolve = MatBackwardSolve_MKL_CPARDISO; 714bd5dbebeSNils Friess } 7153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 71651d8ba46SPierre Jolivet } 71751d8ba46SPierre Jolivet 71866976f2fSJacob Faibussowitsch static PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer) 719d71ae5a4SJacob Faibussowitsch { 7209f196a02SMartin Diehl PetscBool isascii; 721d305a81bSVasiliy Kozyrev PetscViewerFormat format; 722660873c6SBarry Smith Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data; 723d305a81bSVasiliy Kozyrev PetscInt i; 724d305a81bSVasiliy Kozyrev 725d305a81bSVasiliy Kozyrev PetscFunctionBegin; 726d305a81bSVasiliy Kozyrev /* check if matrix is mkl_cpardiso type */ 7273ba16761SJacob Faibussowitsch if (A->ops->solve != MatSolve_MKL_CPARDISO) PetscFunctionReturn(PETSC_SUCCESS); 728d305a81bSVasiliy Kozyrev 7299f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 7309f196a02SMartin Diehl if (isascii) { 7319566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 732d305a81bSVasiliy Kozyrev if (format == PETSC_VIEWER_ASCII_INFO) { 7331d27aa22SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO run parameters:\n")); 7341d27aa22SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO phase: %d \n", mat_mkl_cpardiso->phase)); 7351d27aa22SBarry Smith for (i = 1; i <= 64; i++) PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO iparm[%d]: %d \n", i, mat_mkl_cpardiso->iparm[i - 1])); 7361d27aa22SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct)); 7371d27aa22SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO mnum: %d \n", mat_mkl_cpardiso->mnum)); 7381d27aa22SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO mtype: %d \n", mat_mkl_cpardiso->mtype)); 7391d27aa22SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO n: %d \n", mat_mkl_cpardiso->n)); 7401d27aa22SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs)); 7411d27aa22SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MKL Cluster PARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl)); 742d305a81bSVasiliy Kozyrev } 743d305a81bSVasiliy Kozyrev } 7443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 745d305a81bSVasiliy Kozyrev } 746d305a81bSVasiliy Kozyrev 74766976f2fSJacob Faibussowitsch static PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info) 748d71ae5a4SJacob Faibussowitsch { 749660873c6SBarry Smith Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data; 750d305a81bSVasiliy Kozyrev 751d305a81bSVasiliy Kozyrev PetscFunctionBegin; 752d305a81bSVasiliy Kozyrev info->block_size = 1.0; 753d305a81bSVasiliy Kozyrev info->nz_allocated = mat_mkl_cpardiso->nz + 0.0; 754d305a81bSVasiliy Kozyrev info->nz_unneeded = 0.0; 755d305a81bSVasiliy Kozyrev info->assemblies = 0.0; 756d305a81bSVasiliy Kozyrev info->mallocs = 0.0; 757d305a81bSVasiliy Kozyrev info->memory = 0.0; 758d305a81bSVasiliy Kozyrev info->fill_ratio_given = 0; 759d305a81bSVasiliy Kozyrev info->fill_ratio_needed = 0; 760d305a81bSVasiliy Kozyrev info->factor_mallocs = 0; 7613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 762d305a81bSVasiliy Kozyrev } 763d305a81bSVasiliy Kozyrev 76466976f2fSJacob Faibussowitsch static PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F, PetscInt icntl, PetscInt ival) 765d71ae5a4SJacob Faibussowitsch { 766660873c6SBarry Smith Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data; 7672b26979fSBarry Smith 768d305a81bSVasiliy Kozyrev PetscFunctionBegin; 769d305a81bSVasiliy Kozyrev if (icntl <= 64) { 770d305a81bSVasiliy Kozyrev mat_mkl_cpardiso->iparm[icntl - 1] = ival; 771d305a81bSVasiliy Kozyrev } else { 772660873c6SBarry Smith if (icntl == 65) mkl_set_num_threads((int)ival); 773660873c6SBarry Smith else if (icntl == 66) mat_mkl_cpardiso->maxfct = ival; 774660873c6SBarry Smith else if (icntl == 67) mat_mkl_cpardiso->mnum = ival; 775660873c6SBarry Smith else if (icntl == 68) mat_mkl_cpardiso->msglvl = ival; 776560a203cSprj- else if (icntl == 69) mat_mkl_cpardiso->mtype = ival; 777d305a81bSVasiliy Kozyrev } 7783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 779d305a81bSVasiliy Kozyrev } 780d305a81bSVasiliy Kozyrev 781d305a81bSVasiliy Kozyrev /*@ 7821d27aa22SBarry Smith MatMkl_CPardisoSetCntl - Set MKL Cluster PARDISO parameters 7831d27aa22SBarry Smith <https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2023-2/onemkl-pardiso-parallel-direct-sparse-solver-iface.html> 784d305a81bSVasiliy Kozyrev 785c3339decSBarry Smith Logically Collective 786d305a81bSVasiliy Kozyrev 787d305a81bSVasiliy Kozyrev Input Parameters: 78811a5261eSBarry Smith + F - the factored matrix obtained by calling `MatGetFactor()` 7891d27aa22SBarry Smith . icntl - index of MKL Cluster PARDISO parameter 7901d27aa22SBarry Smith - ival - value of MKL Cluster PARDISO parameter 791d305a81bSVasiliy Kozyrev 7923c7db156SBarry Smith Options Database Key: 793147403d9SBarry Smith . -mat_mkl_cpardiso_<icntl> <ival> - set the option numbered icntl to ival 794d305a81bSVasiliy Kozyrev 795fe59aa6dSJacob Faibussowitsch Level: intermediate 7967ca43916SBarry Smith 79711a5261eSBarry Smith Note: 79811a5261eSBarry Smith This routine cannot be used if you are solving the linear system with `TS`, `SNES`, or `KSP`, only if you directly call `MatGetFactor()` so use the options 7992ef1f0ffSBarry Smith database approach when working with `TS`, `SNES`, or `KSP`. See `MATSOLVERMKL_CPARDISO` for the options 800d305a81bSVasiliy Kozyrev 8011cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MATMPIAIJ`, `MATSOLVERMKL_CPARDISO` 802d305a81bSVasiliy Kozyrev @*/ 803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMkl_CPardisoSetCntl(Mat F, PetscInt icntl, PetscInt ival) 804d71ae5a4SJacob Faibussowitsch { 805d305a81bSVasiliy Kozyrev PetscFunctionBegin; 806cac4c232SBarry Smith PetscTryMethod(F, "MatMkl_CPardisoSetCntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 8073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 808d305a81bSVasiliy Kozyrev } 809d305a81bSVasiliy Kozyrev 8109b4359b8SBarry Smith /*MC 8111d27aa22SBarry Smith MATSOLVERMKL_CPARDISO - A matrix type providing direct solvers (LU) for parallel matrices via the external package MKL Cluster PARDISO 8121d27aa22SBarry Smith <https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2023-2/onemkl-pardiso-parallel-direct-sparse-solver-iface.html> 8139b4359b8SBarry Smith 81411a5261eSBarry Smith Works with `MATMPIAIJ` matrices 8159b4359b8SBarry Smith 8162ef1f0ffSBarry Smith Use `-pc_type lu` `-pc_factor_mat_solver_type mkl_cpardiso` to use this direct solver 8179b4359b8SBarry Smith 8189b4359b8SBarry Smith Options Database Keys: 8191d27aa22SBarry Smith + -mat_mkl_cpardiso_65 - Suggested number of threads to use within MKL Cluster PARDISO 8209b4359b8SBarry Smith . -mat_mkl_cpardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time 8219b4359b8SBarry Smith . -mat_mkl_cpardiso_67 - Indicates the actual matrix for the solution phase 8229b4359b8SBarry Smith . -mat_mkl_cpardiso_68 - Message level information, use 1 to get detailed information on the solver options 8239b4359b8SBarry Smith . -mat_mkl_cpardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type 8249b4359b8SBarry Smith . -mat_mkl_cpardiso_1 - Use default values 8259b4359b8SBarry Smith . -mat_mkl_cpardiso_2 - Fill-in reducing ordering for the input matrix 8269b4359b8SBarry Smith . -mat_mkl_cpardiso_4 - Preconditioned CGS/CG 8279b4359b8SBarry Smith . -mat_mkl_cpardiso_5 - User permutation 8289b4359b8SBarry Smith . -mat_mkl_cpardiso_6 - Write solution on x 8299b4359b8SBarry Smith . -mat_mkl_cpardiso_8 - Iterative refinement step 8309b4359b8SBarry Smith . -mat_mkl_cpardiso_10 - Pivoting perturbation 8319b4359b8SBarry Smith . -mat_mkl_cpardiso_11 - Scaling vectors 8329b4359b8SBarry Smith . -mat_mkl_cpardiso_12 - Solve with transposed or conjugate transposed matrix A 8339b4359b8SBarry Smith . -mat_mkl_cpardiso_13 - Improved accuracy using (non-) symmetric weighted matching 8349b4359b8SBarry Smith . -mat_mkl_cpardiso_18 - Numbers of non-zero elements 8359b4359b8SBarry Smith . -mat_mkl_cpardiso_19 - Report number of floating point operations 8369b4359b8SBarry Smith . -mat_mkl_cpardiso_21 - Pivoting for symmetric indefinite matrices 8379b4359b8SBarry Smith . -mat_mkl_cpardiso_24 - Parallel factorization control 8389b4359b8SBarry Smith . -mat_mkl_cpardiso_25 - Parallel forward/backward solve control 8399b4359b8SBarry Smith . -mat_mkl_cpardiso_27 - Matrix checker 8409b4359b8SBarry Smith . -mat_mkl_cpardiso_31 - Partial solve and computing selected components of the solution vectors 8419b4359b8SBarry Smith . -mat_mkl_cpardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode 8421d27aa22SBarry Smith - -mat_mkl_cpardiso_60 - Intel MKL Cluster PARDISO mode 8439b4359b8SBarry Smith 8449b4359b8SBarry Smith Level: beginner 8459b4359b8SBarry Smith 8469b4359b8SBarry Smith Notes: 8472ef1f0ffSBarry Smith Use `-mat_mkl_cpardiso_68 1` to display the number of threads the solver is using. MKL does not provide a way to directly access this 8489b4359b8SBarry Smith information. 8499b4359b8SBarry Smith 8501d27aa22SBarry Smith For more information on the options check 8511d27aa22SBarry Smith <https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2023-2/onemkl-pardiso-parallel-direct-sparse-solver-iface.html> 8529b4359b8SBarry Smith 8531d27aa22SBarry Smith .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMkl_CPardisoSetCntl()`, `MatGetFactor()`, `MATSOLVERMKL_PARDISO` 8549b4359b8SBarry Smith M*/ 8559b4359b8SBarry Smith 856d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type) 857d71ae5a4SJacob Faibussowitsch { 858d305a81bSVasiliy Kozyrev PetscFunctionBegin; 859d305a81bSVasiliy Kozyrev *type = MATSOLVERMKL_CPARDISO; 8603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 861d305a81bSVasiliy Kozyrev } 862d305a81bSVasiliy Kozyrev 863d305a81bSVasiliy Kozyrev /* MatGetFactor for MPI AIJ matrices */ 864d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A, MatFactorType ftype, Mat *F) 865d71ae5a4SJacob Faibussowitsch { 866d305a81bSVasiliy Kozyrev Mat B; 867d305a81bSVasiliy Kozyrev Mat_MKL_CPARDISO *mat_mkl_cpardiso; 86851d8ba46SPierre Jolivet PetscBool isSeqAIJ, isMPIBAIJ, isMPISBAIJ; 869d305a81bSVasiliy Kozyrev 870d305a81bSVasiliy Kozyrev PetscFunctionBegin; 871d305a81bSVasiliy Kozyrev /* Create the factorization matrix */ 872d305a81bSVasiliy Kozyrev 8739566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 8749566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &isMPIBAIJ)); 8759566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISBAIJ, &isMPISBAIJ)); 8769566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 8779566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 8789566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("mkl_cpardiso", &((PetscObject)B)->type_name)); 8799566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 880d305a81bSVasiliy Kozyrev 8814dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mat_mkl_cpardiso)); 882d305a81bSVasiliy Kozyrev 883660873c6SBarry Smith if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO; 88451d8ba46SPierre Jolivet else if (isMPIBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO; 88551d8ba46SPierre Jolivet else if (isMPISBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO; 886660873c6SBarry Smith else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO; 887d305a81bSVasiliy Kozyrev 88851d8ba46SPierre Jolivet if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO; 88951d8ba46SPierre Jolivet else B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_CPARDISO; 890d305a81bSVasiliy Kozyrev B->ops->destroy = MatDestroy_MKL_CPARDISO; 891d305a81bSVasiliy Kozyrev 892d305a81bSVasiliy Kozyrev B->ops->view = MatView_MKL_CPARDISO; 893d305a81bSVasiliy Kozyrev B->ops->getinfo = MatGetInfo_MKL_CPARDISO; 894d305a81bSVasiliy Kozyrev 895d305a81bSVasiliy Kozyrev B->factortype = ftype; 896d305a81bSVasiliy Kozyrev B->assembled = PETSC_TRUE; /* required by -ksp_view */ 897d305a81bSVasiliy Kozyrev 898660873c6SBarry Smith B->data = mat_mkl_cpardiso; 899d305a81bSVasiliy Kozyrev 90000c67f3bSHong Zhang /* set solvertype */ 9019566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype)); 9029566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERMKL_CPARDISO, &B->solvertype)); 90300c67f3bSHong Zhang 9049566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mkl_cpardiso)); 9059566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMkl_CPardisoSetCntl_C", MatMkl_CPardisoSetCntl_MKL_CPARDISO)); 9069566063dSJacob Faibussowitsch PetscCall(PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso)); 907d305a81bSVasiliy Kozyrev 908d305a81bSVasiliy Kozyrev *F = B; 9093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 910d305a81bSVasiliy Kozyrev } 911d305a81bSVasiliy Kozyrev 912d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void) 913d71ae5a4SJacob Faibussowitsch { 914d305a81bSVasiliy Kozyrev PetscFunctionBegin; 9159566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_mpiaij_mkl_cpardiso)); 9169566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_mpiaij_mkl_cpardiso)); 9179566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_mpiaij_mkl_cpardiso)); 9189566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_mpiaij_mkl_cpardiso)); 9193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 920d305a81bSVasiliy Kozyrev } 921