1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 3c6db04a5SJed Brown #include <petscbt.h> 4c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 53b2fbd54SBarry Smith 68fa24674SBarry Smith /* 78fa24674SBarry Smith Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 8312e372aSBarry Smith 9312e372aSBarry Smith This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 108fa24674SBarry Smith */ 11ff6a9541SJacob Faibussowitsch #if 0 12d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) 13d71ae5a4SJacob Faibussowitsch { 148fa24674SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 15889a8dedSBarry Smith PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 168fa24674SBarry Smith const PetscInt *ai = a->i, *aj = a->j; 178fa24674SBarry Smith const PetscScalar *aa = a->a; 18ace3abfcSBarry Smith PetscBool *done; 19889a8dedSBarry Smith PetscReal best, past = 0, future; 203a40ed3dSBarry Smith 218fa24674SBarry Smith PetscFunctionBegin; 228fa24674SBarry Smith /* pick initial row */ 238fa24674SBarry Smith best = -1; 248fa24674SBarry Smith for (i = 0; i < n; i++) { 2575567043SBarry Smith future = 0.0; 268fa24674SBarry Smith for (j = ai[i]; j < ai[i + 1]; j++) { 27db4deed7SKarl Rupp if (aj[j] != i) future += PetscAbsScalar(aa[j]); 28db4deed7SKarl Rupp else past = PetscAbsScalar(aa[j]); 298fa24674SBarry Smith } 308fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 318fa24674SBarry Smith if (past / future > best) { 328fa24674SBarry Smith best = past / future; 338fa24674SBarry Smith current = i; 348fa24674SBarry Smith } 358fa24674SBarry Smith } 368fa24674SBarry Smith 379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &done)); 389566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(done, n)); 399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &order)); 408fa24674SBarry Smith order[0] = current; 418fa24674SBarry Smith for (i = 0; i < n - 1; i++) { 428fa24674SBarry Smith done[current] = PETSC_TRUE; 438fa24674SBarry Smith best = -1; 448fa24674SBarry Smith /* loop over all neighbors of current pivot */ 458fa24674SBarry Smith for (j = ai[current]; j < ai[current + 1]; j++) { 468fa24674SBarry Smith jj = aj[j]; 478fa24674SBarry Smith if (done[jj]) continue; 488fa24674SBarry Smith /* loop over columns of potential next row computing weights for below and above diagonal */ 498fa24674SBarry Smith past = future = 0.0; 508fa24674SBarry Smith for (k = ai[jj]; k < ai[jj + 1]; k++) { 518fa24674SBarry Smith kk = aj[k]; 528fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[k]); 538fa24674SBarry Smith else if (kk != jj) future += PetscAbsScalar(aa[k]); 548fa24674SBarry Smith } 558fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 568fa24674SBarry Smith if (past / future > best) { 578fa24674SBarry Smith best = past / future; 588fa24674SBarry Smith newcurrent = jj; 598fa24674SBarry Smith } 608fa24674SBarry Smith } 618fa24674SBarry Smith if (best == -1) { /* no neighbors to select from so select best of all that remain */ 628fa24674SBarry Smith best = -1; 638fa24674SBarry Smith for (k = 0; k < n; k++) { 648fa24674SBarry Smith if (done[k]) continue; 6575567043SBarry Smith future = 0.0; 6675567043SBarry Smith past = 0.0; 678fa24674SBarry Smith for (j = ai[k]; j < ai[k + 1]; j++) { 688fa24674SBarry Smith kk = aj[j]; 698fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[j]); 708fa24674SBarry Smith else if (kk != k) future += PetscAbsScalar(aa[j]); 718fa24674SBarry Smith } 728fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 738fa24674SBarry Smith if (past / future > best) { 748fa24674SBarry Smith best = past / future; 758fa24674SBarry Smith newcurrent = k; 768fa24674SBarry Smith } 778fa24674SBarry Smith } 788fa24674SBarry Smith } 7908401ef6SPierre Jolivet PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 808fa24674SBarry Smith current = newcurrent; 818fa24674SBarry Smith order[i + 1] = current; 828fa24674SBarry Smith } 839566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 848fa24674SBarry Smith *icol = *irow; 859566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)*irow)); 869566063dSJacob Faibussowitsch PetscCall(PetscFree(done)); 879566063dSJacob Faibussowitsch PetscCall(PetscFree(order)); 883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 893b2fbd54SBarry Smith } 90ff6a9541SJacob Faibussowitsch #endif 913b2fbd54SBarry Smith 92d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 93d71ae5a4SJacob Faibussowitsch { 944ac6704cSBarry Smith PetscFunctionBegin; 954ac6704cSBarry Smith *type = MATSOLVERPETSC; 963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 974ac6704cSBarry Smith } 984ac6704cSBarry Smith 99d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 100d71ae5a4SJacob Faibussowitsch { 101d0f46423SBarry Smith PetscInt n = A->rmap->n; 102b24902e0SBarry Smith 103b24902e0SBarry Smith PetscFunctionBegin; 104891bceaeSHong Zhang #if defined(PETSC_USE_COMPLEX) 10503e5aca4SStefano Zampini if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 10603e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n")); 10703e5aca4SStefano Zampini *B = NULL; 10803e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 10903e5aca4SStefano Zampini } 110891bceaeSHong Zhang #endif 11103e5aca4SStefano Zampini 1129566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 1139566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, n, n, n, n)); 114599ef60dSHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 1159566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQAIJ)); 1162205254eSKarl Rupp 11735233d99SShri Abhyankar (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 11835233d99SShri Abhyankar (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 1192205254eSKarl Rupp 1209566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 1219566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 1229566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 1239566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 124b24902e0SBarry Smith } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1259566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQSBAIJ)); 1269566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 1272205254eSKarl Rupp 12835233d99SShri Abhyankar (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 12935233d99SShri Abhyankar (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 1309566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 1319566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 132e32f2f54SBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 133d5f3da31SBarry Smith (*B)->factortype = ftype; 13400c67f3bSHong Zhang 1359566063dSJacob Faibussowitsch PetscCall(PetscFree((*B)->solvertype)); 1369566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 137f73b0415SBarry Smith (*B)->canuseordering = PETSC_TRUE; 1389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 1393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 140b24902e0SBarry Smith } 141b24902e0SBarry Smith 142ff6a9541SJacob Faibussowitsch #if 0 143ff6a9541SJacob Faibussowitsch // currently unused 144ff6a9541SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 145d71ae5a4SJacob Faibussowitsch { 146416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 147289bc588SBarry Smith IS isicol; 1485d0c19d7SBarry Smith const PetscInt *r, *ic; 1495d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 1501393bc97SHong Zhang PetscInt *bi, *bj, *ajtmp; 1511393bc97SHong Zhang PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 1529e046878SBarry Smith PetscReal f; 1534875ba38SHong Zhang PetscInt nlnk, *lnk, k, **bi_ptr; 1540298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 1551393bc97SHong Zhang PetscBT lnkbt; 156ece542f9SHong Zhang PetscBool missing; 157289bc588SBarry Smith 1583a40ed3dSBarry Smith PetscFunctionBegin; 15908401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 1609566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 16128b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 162ece542f9SHong Zhang 1639566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1649566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 1659566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 166289bc588SBarry Smith 167289bc588SBarry Smith /* get new row pointers */ 1689f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi)); 1691393bc97SHong Zhang bi[0] = 0; 1701393bc97SHong Zhang 1711393bc97SHong Zhang /* bdiag is location of diagonal in factor */ 1729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1731393bc97SHong Zhang bdiag[0] = 0; 1741393bc97SHong Zhang 1751393bc97SHong Zhang /* linked list for storing column indices of the active row */ 1761393bc97SHong Zhang nlnk = n + 1; 1779566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 1789566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 1791393bc97SHong Zhang 1801393bc97SHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 181d3d32019SBarry Smith f = info->fill; 182aa91b3e7SRichard Tran Mills if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 1839566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1841393bc97SHong Zhang current_space = free_space; 185289bc588SBarry Smith 186289bc588SBarry Smith for (i = 0; i < n; i++) { 1871393bc97SHong Zhang /* copy previous fill into linked list */ 188289bc588SBarry Smith nzi = 0; 1891393bc97SHong Zhang nnz = ai[r[i] + 1] - ai[r[i]]; 1901393bc97SHong Zhang ajtmp = aj + ai[r[i]]; 1919566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 1921393bc97SHong Zhang nzi += nlnk; 1931393bc97SHong Zhang 1941393bc97SHong Zhang /* add pivot rows into linked list */ 1951393bc97SHong Zhang row = lnk[n]; 1961393bc97SHong Zhang while (row < i) { 197d1170560SHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 198d1170560SHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 1999566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 2001393bc97SHong Zhang nzi += nlnk; 2011393bc97SHong Zhang row = lnk[row]; 202289bc588SBarry Smith } 2031393bc97SHong Zhang bi[i + 1] = bi[i] + nzi; 2041393bc97SHong Zhang im[i] = nzi; 2051393bc97SHong Zhang 2061393bc97SHong Zhang /* mark bdiag */ 2071393bc97SHong Zhang nzbd = 0; 2081393bc97SHong Zhang nnz = nzi; 2091393bc97SHong Zhang k = lnk[n]; 2101393bc97SHong Zhang while (nnz-- && k < i) { 2111393bc97SHong Zhang nzbd++; 2121393bc97SHong Zhang k = lnk[k]; 2131393bc97SHong Zhang } 2141393bc97SHong Zhang bdiag[i] = bi[i] + nzbd; 2151393bc97SHong Zhang 2161393bc97SHong Zhang /* if free space is not available, make more free space */ 2171393bc97SHong Zhang if (current_space->local_remaining < nzi) { 218f91af8c7SBarry Smith nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 2199566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 2201393bc97SHong Zhang reallocs++; 2211393bc97SHong Zhang } 2221393bc97SHong Zhang 2231393bc97SHong Zhang /* copy data into free space, then initialize lnk */ 2249566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 2252205254eSKarl Rupp 2261393bc97SHong Zhang bi_ptr[i] = current_space->array; 2271393bc97SHong Zhang current_space->array += nzi; 2281393bc97SHong Zhang current_space->local_used += nzi; 2291393bc97SHong Zhang current_space->local_remaining -= nzi; 230289bc588SBarry Smith } 2316cf91177SBarry Smith #if defined(PETSC_USE_INFO) 232464e8d44SSatish Balay if (ai[n] != 0) { 2331393bc97SHong Zhang PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2349566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 2359566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2369566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 2379566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 23805bf559cSSatish Balay } else { 2399566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 24005bf559cSSatish Balay } 24163ba0a88SBarry Smith #endif 2422fb3ed76SBarry Smith 2439566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 2449566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 2451987afe7SBarry Smith 2461393bc97SHong Zhang /* destroy list of free space and other temporary array(s) */ 2479f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj)); 2489566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 2499566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 2509566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 251289bc588SBarry Smith 252289bc588SBarry Smith /* put together the new matrix */ 2539566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 25457508eceSPierre Jolivet b = (Mat_SeqAIJ *)B->data; 255e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 2569f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a)); 2579f0612e4SBarry Smith b->free_a = PETSC_TRUE; 2581393bc97SHong Zhang b->j = bj; 2591393bc97SHong Zhang b->i = bi; 2601393bc97SHong Zhang b->diag = bdiag; 261f4259b30SLisandro Dalcin b->ilen = NULL; 262f4259b30SLisandro Dalcin b->imax = NULL; 263416022c9SBarry Smith b->row = isrow; 264416022c9SBarry Smith b->col = iscol; 2659566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 2669566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 26782bf6240SBarry Smith b->icol = isicol; 2689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2691393bc97SHong Zhang 2701393bc97SHong Zhang /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 2711393bc97SHong Zhang b->maxnz = b->nz = bi[n]; 2727fd98bd6SLois Curfman McInnes 27357508eceSPierre Jolivet B->factortype = MAT_FACTOR_LU; 27457508eceSPierre Jolivet B->info.factor_mallocs = reallocs; 27557508eceSPierre Jolivet B->info.fill_ratio_given = f; 276703deb49SSatish Balay 2779f7d0b68SHong Zhang if (ai[n]) { 27857508eceSPierre Jolivet B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 279eea2913aSSatish Balay } else { 28057508eceSPierre Jolivet B->info.fill_ratio_needed = 0.0; 281eea2913aSSatish Balay } 28257508eceSPierre Jolivet B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2834d12350bSJunchao Zhang if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 285289bc588SBarry Smith } 286ff6a9541SJacob Faibussowitsch #endif 2871393bc97SHong Zhang 288d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 289d71ae5a4SJacob Faibussowitsch { 290ce3d78c0SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 291ce3d78c0SShri Abhyankar IS isicol; 2928758e1faSBarry Smith const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 2938758e1faSBarry Smith PetscInt i, n = A->rmap->n; 2948758e1faSBarry Smith PetscInt *bi, *bj; 295ce3d78c0SShri Abhyankar PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 296ce3d78c0SShri Abhyankar PetscReal f; 297ce3d78c0SShri Abhyankar PetscInt nlnk, *lnk, k, **bi_ptr; 2980298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 299ce3d78c0SShri Abhyankar PetscBT lnkbt; 300ece542f9SHong Zhang PetscBool missing; 301ce3d78c0SShri Abhyankar 302ce3d78c0SShri Abhyankar PetscFunctionBegin; 30308401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 3049566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 30528b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 306ece542f9SHong Zhang 3079566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3089566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 3099566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 310ce3d78c0SShri Abhyankar 3111bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 3129f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 3139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 314a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 3159b48462bSShri Abhyankar 3169b48462bSShri Abhyankar /* linked list for storing column indices of the active row */ 3179b48462bSShri Abhyankar nlnk = n + 1; 3189566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3199b48462bSShri Abhyankar 3209566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 3219b48462bSShri Abhyankar 3229b48462bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 3239b48462bSShri Abhyankar f = info->fill; 324aa91b3e7SRichard Tran Mills if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 3259566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 3269b48462bSShri Abhyankar current_space = free_space; 3279b48462bSShri Abhyankar 3289b48462bSShri Abhyankar for (i = 0; i < n; i++) { 3299b48462bSShri Abhyankar /* copy previous fill into linked list */ 3309b48462bSShri Abhyankar nzi = 0; 3319b48462bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 3329b48462bSShri Abhyankar ajtmp = aj + ai[r[i]]; 3339566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3349b48462bSShri Abhyankar nzi += nlnk; 3359b48462bSShri Abhyankar 3369b48462bSShri Abhyankar /* add pivot rows into linked list */ 3379b48462bSShri Abhyankar row = lnk[n]; 3389b48462bSShri Abhyankar while (row < i) { 3399b48462bSShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 3409b48462bSShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 3419566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 3429b48462bSShri Abhyankar nzi += nlnk; 3439b48462bSShri Abhyankar row = lnk[row]; 3449b48462bSShri Abhyankar } 3459b48462bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 3469b48462bSShri Abhyankar im[i] = nzi; 3479b48462bSShri Abhyankar 3489b48462bSShri Abhyankar /* mark bdiag */ 3499b48462bSShri Abhyankar nzbd = 0; 3509b48462bSShri Abhyankar nnz = nzi; 3519b48462bSShri Abhyankar k = lnk[n]; 3529b48462bSShri Abhyankar while (nnz-- && k < i) { 3539b48462bSShri Abhyankar nzbd++; 3549b48462bSShri Abhyankar k = lnk[k]; 3559b48462bSShri Abhyankar } 3569b48462bSShri Abhyankar bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 3579b48462bSShri Abhyankar 3589b48462bSShri Abhyankar /* if free space is not available, make more free space */ 3599b48462bSShri Abhyankar if (current_space->local_remaining < nzi) { 3602f18eb33SBarry Smith /* estimated additional space needed */ 3611df4278eSBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 3629566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 3639b48462bSShri Abhyankar reallocs++; 3649b48462bSShri Abhyankar } 3659b48462bSShri Abhyankar 3669b48462bSShri Abhyankar /* copy data into free space, then initialize lnk */ 3679566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 3682205254eSKarl Rupp 3699b48462bSShri Abhyankar bi_ptr[i] = current_space->array; 3709b48462bSShri Abhyankar current_space->array += nzi; 3719b48462bSShri Abhyankar current_space->local_used += nzi; 3729b48462bSShri Abhyankar current_space->local_remaining -= nzi; 3739b48462bSShri Abhyankar } 3749b48462bSShri Abhyankar 3759566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 3769566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 3779b48462bSShri Abhyankar 3789263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 379*84648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 3809566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 3819566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 3829566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 3839b48462bSShri Abhyankar 3849b48462bSShri Abhyankar /* put together the new matrix */ 3859566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 38657508eceSPierre Jolivet b = (Mat_SeqAIJ *)B->data; 3879b48462bSShri Abhyankar b->free_ij = PETSC_TRUE; 3889f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 3899f0612e4SBarry Smith b->free_a = PETSC_TRUE; 3909b48462bSShri Abhyankar b->j = bj; 3919b48462bSShri Abhyankar b->i = bi; 3929b48462bSShri Abhyankar b->diag = bdiag; 393f4259b30SLisandro Dalcin b->ilen = NULL; 394f4259b30SLisandro Dalcin b->imax = NULL; 3959b48462bSShri Abhyankar b->row = isrow; 3969b48462bSShri Abhyankar b->col = iscol; 3979566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 3989566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 3999b48462bSShri Abhyankar b->icol = isicol; 400*84648c2dSPierre Jolivet PetscCall(PetscMalloc1(n, &b->solve_work)); 4019b48462bSShri Abhyankar 4029b48462bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 4039b48462bSShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 4042205254eSKarl Rupp 405d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 406f284f12aSHong Zhang B->info.factor_mallocs = reallocs; 407f284f12aSHong Zhang B->info.fill_ratio_given = f; 4089b48462bSShri Abhyankar 4099b48462bSShri Abhyankar if (ai[n]) { 410f284f12aSHong Zhang B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 4119b48462bSShri Abhyankar } else { 412f284f12aSHong Zhang B->info.fill_ratio_needed = 0.0; 4139b48462bSShri Abhyankar } 4149263d837SHong Zhang #if defined(PETSC_USE_INFO) 4159263d837SHong Zhang if (ai[n] != 0) { 4169263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 4179566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 4189566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 4199566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 4209566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 4219263d837SHong Zhang } else { 4229566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 4239263d837SHong Zhang } 4249263d837SHong Zhang #endif 42535233d99SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 4264d12350bSJunchao Zhang if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 4279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 4283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4299b48462bSShri Abhyankar } 4309b48462bSShri Abhyankar 4315b5bc046SBarry Smith /* 4325b5bc046SBarry Smith Trouble in factorization, should we dump the original matrix? 4335b5bc046SBarry Smith */ 434d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorDumpMatrix(Mat A) 435d71ae5a4SJacob Faibussowitsch { 436ace3abfcSBarry Smith PetscBool flg = PETSC_FALSE; 4375b5bc046SBarry Smith 4385b5bc046SBarry Smith PetscFunctionBegin; 4399566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 4405b5bc046SBarry Smith if (flg) { 4415b5bc046SBarry Smith PetscViewer viewer; 4425b5bc046SBarry Smith char filename[PETSC_MAX_PATH_LEN]; 4435b5bc046SBarry Smith 4449566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 4459566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 4469566063dSJacob Faibussowitsch PetscCall(MatView(A, viewer)); 4479566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&viewer)); 4485b5bc046SBarry Smith } 4493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4505b5bc046SBarry Smith } 4515b5bc046SBarry Smith 452d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 453d71ae5a4SJacob Faibussowitsch { 454c9c72f8fSHong Zhang Mat C = B; 455c9c72f8fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 456c9c72f8fSHong Zhang IS isrow = b->row, isicol = b->icol; 457c9c72f8fSHong Zhang const PetscInt *r, *ic, *ics; 458bbd65245SShri Abhyankar const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 459bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row, *pj; 460bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp; 461bbd65245SShri Abhyankar MatScalar *rtmp, *pc, multiplier, *pv; 462b65878eeSJunchao Zhang const MatScalar *aa, *v; 463b65878eeSJunchao Zhang MatScalar *ba; 464ace3abfcSBarry Smith PetscBool row_identity, col_identity; 465d90ac83dSHong Zhang FactorShiftCtx sctx; 4664f81c4b7SBarry Smith const PetscInt *ddiag; 467d4ad06f7SHong Zhang PetscReal rs; 468d4ad06f7SHong Zhang MatScalar d; 469d4ad06f7SHong Zhang 470c9c72f8fSHong Zhang PetscFunctionBegin; 471b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 472b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 473d6e5152cSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 4749566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 475d4ad06f7SHong Zhang 476f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 477d4ad06f7SHong Zhang ddiag = a->diag; 478d4ad06f7SHong Zhang sctx.shift_top = info->zeropivot; 479d4ad06f7SHong Zhang for (i = 0; i < n; i++) { 480d4ad06f7SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 481d4ad06f7SHong Zhang d = (aa)[ddiag[i]]; 482d4ad06f7SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 483d4ad06f7SHong Zhang v = aa + ai[i]; 484d4ad06f7SHong Zhang nz = ai[i + 1] - ai[i]; 4852205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 486d4ad06f7SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 487d4ad06f7SHong Zhang } 488d4ad06f7SHong Zhang sctx.shift_top *= 1.1; 489d4ad06f7SHong Zhang sctx.nshift_max = 5; 490d4ad06f7SHong Zhang sctx.shift_lo = 0.; 491d4ad06f7SHong Zhang sctx.shift_hi = 1.; 492d4ad06f7SHong Zhang } 493d4ad06f7SHong Zhang 4949566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 4959566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 4969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 497c9c72f8fSHong Zhang ics = ic; 498c9c72f8fSHong Zhang 499d4ad06f7SHong Zhang do { 50007b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 501c9c72f8fSHong Zhang for (i = 0; i < n; i++) { 502c9c72f8fSHong Zhang /* zero rtmp */ 503c9c72f8fSHong Zhang /* L part */ 504c9c72f8fSHong Zhang nz = bi[i + 1] - bi[i]; 505c9c72f8fSHong Zhang bjtmp = bj + bi[i]; 506c9c72f8fSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 507c9c72f8fSHong Zhang 508c9c72f8fSHong Zhang /* U part */ 50962fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 51062fbd6afSShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 511813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 512813a1f2bSShri Abhyankar 513813a1f2bSShri Abhyankar /* load in initial (unfactored row) */ 514813a1f2bSShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]]; 515813a1f2bSShri Abhyankar ajtmp = aj + ai[r[i]]; 516813a1f2bSShri Abhyankar v = aa + ai[r[i]]; 517ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 518813a1f2bSShri Abhyankar /* ZeropivotApply() */ 51998a93161SHong Zhang rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 520813a1f2bSShri Abhyankar 521813a1f2bSShri Abhyankar /* elimination */ 522813a1f2bSShri Abhyankar bjtmp = bj + bi[i]; 523813a1f2bSShri Abhyankar row = *bjtmp++; 524813a1f2bSShri Abhyankar nzL = bi[i + 1] - bi[i]; 525f268cba8SShri Abhyankar for (k = 0; k < nzL; k++) { 526813a1f2bSShri Abhyankar pc = rtmp + row; 527813a1f2bSShri Abhyankar if (*pc != 0.0) { 528b65878eeSJunchao Zhang pv = ba + bdiag[row]; 529813a1f2bSShri Abhyankar multiplier = *pc * (*pv); 530813a1f2bSShri Abhyankar *pc = multiplier; 5312205254eSKarl Rupp 53262fbd6afSShri Abhyankar pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 533b65878eeSJunchao Zhang pv = ba + bdiag[row + 1] + 1; 53462fbd6afSShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 5352205254eSKarl Rupp 536813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 5379566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 538813a1f2bSShri Abhyankar } 539f268cba8SShri Abhyankar row = *bjtmp++; 540813a1f2bSShri Abhyankar } 541813a1f2bSShri Abhyankar 542813a1f2bSShri Abhyankar /* finished row so stick it into b->a */ 543813a1f2bSShri Abhyankar rs = 0.0; 544813a1f2bSShri Abhyankar /* L part */ 545b65878eeSJunchao Zhang pv = ba + bi[i]; 546813a1f2bSShri Abhyankar pj = b->j + bi[i]; 547813a1f2bSShri Abhyankar nz = bi[i + 1] - bi[i]; 548813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 5499371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 5509371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 551813a1f2bSShri Abhyankar } 552813a1f2bSShri Abhyankar 553813a1f2bSShri Abhyankar /* U part */ 554b65878eeSJunchao Zhang pv = ba + bdiag[i + 1] + 1; 55562fbd6afSShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 55662fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 557813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 5589371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 5599371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 560813a1f2bSShri Abhyankar } 561813a1f2bSShri Abhyankar 562813a1f2bSShri Abhyankar sctx.rs = rs; 563813a1f2bSShri Abhyankar sctx.pv = rtmp[i]; 5649566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 56507b50cabSHong Zhang if (sctx.newshift) break; /* break for-loop */ 56607b50cabSHong Zhang rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 567813a1f2bSShri Abhyankar 568a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 569b65878eeSJunchao Zhang pv = ba + bdiag[i]; 570813a1f2bSShri Abhyankar *pv = 1.0 / rtmp[i]; 571813a1f2bSShri Abhyankar 572813a1f2bSShri Abhyankar } /* endof for (i=0; i<n; i++) { */ 573813a1f2bSShri Abhyankar 5748ff23777SHong Zhang /* MatPivotRefine() */ 57507b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 576813a1f2bSShri Abhyankar /* 577813a1f2bSShri Abhyankar * if no shift in this attempt & shifting & started shifting & can refine, 578813a1f2bSShri Abhyankar * then try lower shift 579813a1f2bSShri Abhyankar */ 580813a1f2bSShri Abhyankar sctx.shift_hi = sctx.shift_fraction; 581813a1f2bSShri Abhyankar sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 582813a1f2bSShri Abhyankar sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 58307b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 584813a1f2bSShri Abhyankar sctx.nshift++; 585813a1f2bSShri Abhyankar } 58607b50cabSHong Zhang } while (sctx.newshift); 587813a1f2bSShri Abhyankar 588b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 589b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 590b65878eeSJunchao Zhang 5919566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 5929566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 5939566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 594d3ac4fa3SBarry Smith 5959566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 5969566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 5974d12350bSJunchao Zhang if (b->inode.size_csr) { 598abb87a52SBarry Smith C->ops->solve = MatSolve_SeqAIJ_Inode; 599abb87a52SBarry Smith } else if (row_identity && col_identity) { 60035233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 601813a1f2bSShri Abhyankar } else { 60235233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 603813a1f2bSShri Abhyankar } 60435233d99SShri Abhyankar C->ops->solveadd = MatSolveAdd_SeqAIJ; 60535233d99SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 60635233d99SShri Abhyankar C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 60735233d99SShri Abhyankar C->ops->matsolve = MatMatSolve_SeqAIJ; 608a3d9026eSPierre Jolivet C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ; 609813a1f2bSShri Abhyankar C->assembled = PETSC_TRUE; 610813a1f2bSShri Abhyankar C->preallocated = PETSC_TRUE; 6112205254eSKarl Rupp 6129566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 613813a1f2bSShri Abhyankar 61414d2772eSHong Zhang /* MatShiftView(A,info,&sctx) */ 615813a1f2bSShri Abhyankar if (sctx.nshift) { 616f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 6179566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 618f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 6199566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 620f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 6219566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 622813a1f2bSShri Abhyankar } 623813a1f2bSShri Abhyankar } 6243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 625813a1f2bSShri Abhyankar } 626813a1f2bSShri Abhyankar 627ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 628ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 629ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 630ff6a9541SJacob Faibussowitsch 631d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 632d71ae5a4SJacob Faibussowitsch { 633719d5645SBarry Smith Mat C = B; 634416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 63582bf6240SBarry Smith IS isrow = b->row, isicol = b->icol; 6365d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 637d278edc7SBarry Smith PetscInt nz, row, i, j, n = A->rmap->n, diag; 638d278edc7SBarry Smith const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 639d278edc7SBarry Smith const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 640d278edc7SBarry Smith MatScalar *pv, *rtmp, *pc, multiplier, d; 641b65878eeSJunchao Zhang const MatScalar *v, *aa; 642b65878eeSJunchao Zhang MatScalar *ba; 64333f9893dSHong Zhang PetscReal rs = 0.0; 644d90ac83dSHong Zhang FactorShiftCtx sctx; 6454f81c4b7SBarry Smith const PetscInt *ddiag; 646ace3abfcSBarry Smith PetscBool row_identity, col_identity; 647289bc588SBarry Smith 6483a40ed3dSBarry Smith PetscFunctionBegin; 649b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 650b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 651504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 6529566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 653ada7143aSSatish Balay 654f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 65542898933SHong Zhang ddiag = a->diag; 6569f7d0b68SHong Zhang sctx.shift_top = info->zeropivot; 6576cc28720Svictorle for (i = 0; i < n; i++) { 6589f95998fSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 6599f7d0b68SHong Zhang d = (aa)[ddiag[i]]; 6601a890ab1SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 6619f7d0b68SHong Zhang v = aa + ai[i]; 6629f7d0b68SHong Zhang nz = ai[i + 1] - ai[i]; 6632205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 6641a890ab1SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 6656cc28720Svictorle } 666118f5789SBarry Smith sctx.shift_top *= 1.1; 667d2276718SHong Zhang sctx.nshift_max = 5; 668d2276718SHong Zhang sctx.shift_lo = 0.; 669d2276718SHong Zhang sctx.shift_hi = 1.; 670a0a356efSHong Zhang } 671a0a356efSHong Zhang 6729566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 6739566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 6749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 675504a3fadSHong Zhang ics = ic; 676504a3fadSHong Zhang 677085256b3SBarry Smith do { 67807b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 679289bc588SBarry Smith for (i = 0; i < n; i++) { 680b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 681b3bf805bSHong Zhang bjtmp = bj + bi[i]; 6829f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 683289bc588SBarry Smith 684289bc588SBarry Smith /* load in initial (unfactored row) */ 6859f7d0b68SHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 6869f7d0b68SHong Zhang ajtmp = aj + ai[r[i]]; 6879f7d0b68SHong Zhang v = aa + ai[r[i]]; 688ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 689d2276718SHong Zhang rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 690289bc588SBarry Smith 691b3bf805bSHong Zhang row = *bjtmp++; 692f2582111SSatish Balay while (row < i) { 6938c37ef55SBarry Smith pc = rtmp + row; 6948c37ef55SBarry Smith if (*pc != 0.0) { 695b65878eeSJunchao Zhang pv = ba + diag_offset[row]; 696010a20caSHong Zhang pj = b->j + diag_offset[row] + 1; 69735aab85fSBarry Smith multiplier = *pc / *pv++; 6988c37ef55SBarry Smith *pc = multiplier; 699b3bf805bSHong Zhang nz = bi[row + 1] - diag_offset[row] - 1; 7009f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 7019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 702289bc588SBarry Smith } 703b3bf805bSHong Zhang row = *bjtmp++; 704289bc588SBarry Smith } 705416022c9SBarry Smith /* finished row so stick it into b->a */ 706b65878eeSJunchao Zhang pv = ba + bi[i]; 707b3bf805bSHong Zhang pj = b->j + bi[i]; 708b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 709b3bf805bSHong Zhang diag = diag_offset[i] - bi[i]; 710e57ff13aSBarry Smith rs = 0.0; 711d3d32019SBarry Smith for (j = 0; j < nz; j++) { 7129f7d0b68SHong Zhang pv[j] = rtmp[pj[j]]; 7139f7d0b68SHong Zhang rs += PetscAbsScalar(pv[j]); 714d3d32019SBarry Smith } 715e57ff13aSBarry Smith rs -= PetscAbsScalar(pv[diag]); 716d2276718SHong Zhang 717d2276718SHong Zhang sctx.rs = rs; 718d2276718SHong Zhang sctx.pv = pv[diag]; 7199566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 72007b50cabSHong Zhang if (sctx.newshift) break; 721504a3fadSHong Zhang pv[diag] = sctx.pv; 72235aab85fSBarry Smith } 723d2276718SHong Zhang 72407b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 7256cc28720Svictorle /* 7266c037d1bSvictorle * if no shift in this attempt & shifting & started shifting & can refine, 7276cc28720Svictorle * then try lower shift 7286cc28720Svictorle */ 7290481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 7300481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 7310481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 73207b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 733d2276718SHong Zhang sctx.nshift++; 7346cc28720Svictorle } 73507b50cabSHong Zhang } while (sctx.newshift); 7360f11f9aeSSatish Balay 737a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 738b65878eeSJunchao Zhang for (i = 0; i < n; i++) ba[diag_offset[i]] = 1.0 / ba[diag_offset[i]]; 7399566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 7409566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 7419566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 742b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 743b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 744d3ac4fa3SBarry Smith 7459566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 7469566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 7478d8c7c43SBarry Smith if (row_identity && col_identity) { 748ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 7498d8c7c43SBarry Smith } else { 750ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_inplace; 751db4efbfdSBarry Smith } 752ad04f41aSHong Zhang C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 753ad04f41aSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 754ad04f41aSHong Zhang C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 755ad04f41aSHong Zhang C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 756a3d9026eSPierre Jolivet C->ops->matsolvetranspose = NULL; 7572205254eSKarl Rupp 758c456f294SBarry Smith C->assembled = PETSC_TRUE; 7595c9eb25fSBarry Smith C->preallocated = PETSC_TRUE; 7602205254eSKarl Rupp 7619566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 762d2276718SHong Zhang if (sctx.nshift) { 763f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 7649566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 765f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 7669566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 7676cc28720Svictorle } 7680697b6caSHong Zhang } 76957508eceSPierre Jolivet C->ops->solve = MatSolve_SeqAIJ_inplace; 77057508eceSPierre Jolivet C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 7712205254eSKarl Rupp 7729566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode(C)); 7733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 774289bc588SBarry Smith } 7750889b5dcSvictorle 776ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 777ff6a9541SJacob Faibussowitsch 778137fb511SHong Zhang /* 779137fb511SHong Zhang This routine implements inplace ILU(0) with row or/and column permutations. 780137fb511SHong Zhang Input: 781137fb511SHong Zhang A - original matrix 782137fb511SHong Zhang Output; 783137fb511SHong Zhang A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 784137fb511SHong Zhang a->j (col index) is permuted by the inverse of colperm, then sorted 785137fb511SHong Zhang a->a reordered accordingly with a->j 786137fb511SHong Zhang a->diag (ptr to diagonal elements) is updated. 787137fb511SHong Zhang */ 788d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 789d71ae5a4SJacob Faibussowitsch { 790b051339dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 791b051339dSHong Zhang IS isrow = a->row, isicol = a->icol; 7925d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 7935d0c19d7SBarry Smith PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 7945d0c19d7SBarry Smith PetscInt *ajtmp, nz, row; 795b051339dSHong Zhang PetscInt *diag = a->diag, nbdiag, *pj; 796a77337e4SBarry Smith PetscScalar *rtmp, *pc, multiplier, d; 797504a3fadSHong Zhang MatScalar *pv, *v; 798137fb511SHong Zhang PetscReal rs; 799d90ac83dSHong Zhang FactorShiftCtx sctx; 800b65878eeSJunchao Zhang MatScalar *aa, *vtmp; 801137fb511SHong Zhang 802137fb511SHong Zhang PetscFunctionBegin; 80308401ef6SPierre Jolivet PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 804504a3fadSHong Zhang 805b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArray(A, &aa)); 806504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 8079566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 808504a3fadSHong Zhang 809504a3fadSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 810504a3fadSHong Zhang const PetscInt *ddiag = a->diag; 811504a3fadSHong Zhang sctx.shift_top = info->zeropivot; 812504a3fadSHong Zhang for (i = 0; i < n; i++) { 813504a3fadSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 814504a3fadSHong Zhang d = (aa)[ddiag[i]]; 815504a3fadSHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 816504a3fadSHong Zhang vtmp = aa + ai[i]; 817504a3fadSHong Zhang nz = ai[i + 1] - ai[i]; 8182205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 819504a3fadSHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 820504a3fadSHong Zhang } 821504a3fadSHong Zhang sctx.shift_top *= 1.1; 822504a3fadSHong Zhang sctx.nshift_max = 5; 823504a3fadSHong Zhang sctx.shift_lo = 0.; 824504a3fadSHong Zhang sctx.shift_hi = 1.; 825504a3fadSHong Zhang } 826504a3fadSHong Zhang 8279566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 8289566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 8299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 8309566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, n + 1)); 831b051339dSHong Zhang ics = ic; 832137fb511SHong Zhang 833504a3fadSHong Zhang #if defined(MV) 83475567043SBarry Smith sctx.shift_top = 0.; 835e60cf9a0SBarry Smith sctx.nshift_max = 0; 83675567043SBarry Smith sctx.shift_lo = 0.; 83775567043SBarry Smith sctx.shift_hi = 0.; 83875567043SBarry Smith sctx.shift_fraction = 0.; 839137fb511SHong Zhang 840f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 84175567043SBarry Smith sctx.shift_top = 0.; 842137fb511SHong Zhang for (i = 0; i < n; i++) { 843137fb511SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 844b65878eeSJunchao Zhang d = aa[diag[i]]; 845137fb511SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 846b65878eeSJunchao Zhang v = aa + ai[i]; 847b051339dSHong Zhang nz = ai[i + 1] - ai[i]; 8482205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 849137fb511SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 850137fb511SHong Zhang } 851137fb511SHong Zhang if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 852137fb511SHong Zhang sctx.shift_top *= 1.1; 853137fb511SHong Zhang sctx.nshift_max = 5; 854137fb511SHong Zhang sctx.shift_lo = 0.; 855137fb511SHong Zhang sctx.shift_hi = 1.; 856137fb511SHong Zhang } 857137fb511SHong Zhang 85875567043SBarry Smith sctx.shift_amount = 0.; 859137fb511SHong Zhang sctx.nshift = 0; 860504a3fadSHong Zhang #endif 861504a3fadSHong Zhang 862137fb511SHong Zhang do { 86307b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 864137fb511SHong Zhang for (i = 0; i < n; i++) { 865b051339dSHong Zhang /* load in initial unfactored row */ 866b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 867b051339dSHong Zhang ajtmp = aj + ai[r[i]]; 868b65878eeSJunchao Zhang v = aa + ai[r[i]]; 869137fb511SHong Zhang /* sort permuted ajtmp and values v accordingly */ 870b051339dSHong Zhang for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 8719566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 872137fb511SHong Zhang 873b051339dSHong Zhang diag[r[i]] = ai[r[i]]; 874137fb511SHong Zhang for (j = 0; j < nz; j++) { 875137fb511SHong Zhang rtmp[ajtmp[j]] = v[j]; 876b051339dSHong Zhang if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 877137fb511SHong Zhang } 878137fb511SHong Zhang rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 879137fb511SHong Zhang 880b051339dSHong Zhang row = *ajtmp++; 881137fb511SHong Zhang while (row < i) { 882137fb511SHong Zhang pc = rtmp + row; 883137fb511SHong Zhang if (*pc != 0.0) { 884b65878eeSJunchao Zhang pv = aa + diag[r[row]]; 885b051339dSHong Zhang pj = aj + diag[r[row]] + 1; 886137fb511SHong Zhang 887137fb511SHong Zhang multiplier = *pc / *pv++; 888137fb511SHong Zhang *pc = multiplier; 889b051339dSHong Zhang nz = ai[r[row] + 1] - diag[r[row]] - 1; 890b051339dSHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 8919566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 892137fb511SHong Zhang } 893b051339dSHong Zhang row = *ajtmp++; 894137fb511SHong Zhang } 895b65878eeSJunchao Zhang /* finished row so overwrite it onto aa */ 896b65878eeSJunchao Zhang pv = aa + ai[r[i]]; 897b051339dSHong Zhang pj = aj + ai[r[i]]; 898b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 899b051339dSHong Zhang nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 900137fb511SHong Zhang 901137fb511SHong Zhang rs = 0.0; 902137fb511SHong Zhang for (j = 0; j < nz; j++) { 903b051339dSHong Zhang pv[j] = rtmp[pj[j]]; 904b051339dSHong Zhang if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 905137fb511SHong Zhang } 906137fb511SHong Zhang 907137fb511SHong Zhang sctx.rs = rs; 908b051339dSHong Zhang sctx.pv = pv[nbdiag]; 9099566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 91007b50cabSHong Zhang if (sctx.newshift) break; 911504a3fadSHong Zhang pv[nbdiag] = sctx.pv; 912137fb511SHong Zhang } 913137fb511SHong Zhang 91407b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 915137fb511SHong Zhang /* 916137fb511SHong Zhang * if no shift in this attempt & shifting & started shifting & can refine, 917137fb511SHong Zhang * then try lower shift 918137fb511SHong Zhang */ 9190481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 9200481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 9210481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 92207b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 923137fb511SHong Zhang sctx.nshift++; 924137fb511SHong Zhang } 92507b50cabSHong Zhang } while (sctx.newshift); 926137fb511SHong Zhang 927a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 928b65878eeSJunchao Zhang for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]]; 929137fb511SHong Zhang 930b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArray(A, &aa)); 9319566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 9329566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 9339566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 9342205254eSKarl Rupp 935b051339dSHong Zhang A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 936ad04f41aSHong Zhang A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 937ad04f41aSHong Zhang A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 938ad04f41aSHong Zhang A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 9392205254eSKarl Rupp 940b051339dSHong Zhang A->assembled = PETSC_TRUE; 9415c9eb25fSBarry Smith A->preallocated = PETSC_TRUE; 9422205254eSKarl Rupp 9439566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(A->cmap->n)); 944137fb511SHong Zhang if (sctx.nshift) { 945f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 9469566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 947f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 9489566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 949137fb511SHong Zhang } 950137fb511SHong Zhang } 9513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 952137fb511SHong Zhang } 953137fb511SHong Zhang 954d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 955d71ae5a4SJacob Faibussowitsch { 956416022c9SBarry Smith Mat C; 957416022c9SBarry Smith 9583a40ed3dSBarry Smith PetscFunctionBegin; 9599566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 9609566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 9619566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(C, A, info)); 9622205254eSKarl Rupp 963db4efbfdSBarry Smith A->ops->solve = C->ops->solve; 964db4efbfdSBarry Smith A->ops->solvetranspose = C->ops->solvetranspose; 9652205254eSKarl Rupp 9669566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 9673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 968da3a660dSBarry Smith } 9691d098868SHong Zhang 970d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 971d71ae5a4SJacob Faibussowitsch { 972416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 973416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 9745d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 9755d0c19d7SBarry Smith PetscInt nz; 9765d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 977dd6ea824SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 978d9fead3dSBarry Smith const PetscScalar *b; 979b65878eeSJunchao Zhang const MatScalar *aa, *v; 9808c37ef55SBarry Smith 9813a40ed3dSBarry Smith PetscFunctionBegin; 9823ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 98391bf9adeSBarry Smith 984b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 9859566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 9869566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 987416022c9SBarry Smith tmp = a->solve_work; 9888c37ef55SBarry Smith 9899371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 9909371c9d4SSatish Balay r = rout; 9919371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 9929371c9d4SSatish Balay c = cout + (n - 1); 9938c37ef55SBarry Smith 9948c37ef55SBarry Smith /* forward solve the lower triangular */ 9958c37ef55SBarry Smith tmp[0] = b[*r++]; 996010a20caSHong Zhang tmps = tmp; 9978c37ef55SBarry Smith for (i = 1; i < n; i++) { 998010a20caSHong Zhang v = aa + ai[i]; 999010a20caSHong Zhang vi = aj + ai[i]; 100053e7425aSBarry Smith nz = a->diag[i] - ai[i]; 100153e7425aSBarry Smith sum = b[*r++]; 1002003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 10038c37ef55SBarry Smith tmp[i] = sum; 10048c37ef55SBarry Smith } 10058c37ef55SBarry Smith 10068c37ef55SBarry Smith /* backward solve the upper triangular */ 10078c37ef55SBarry Smith for (i = n - 1; i >= 0; i--) { 1008010a20caSHong Zhang v = aa + a->diag[i] + 1; 1009010a20caSHong Zhang vi = aj + a->diag[i] + 1; 1010416022c9SBarry Smith nz = ai[i + 1] - a->diag[i] - 1; 10118c37ef55SBarry Smith sum = tmp[i]; 1012003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1013010a20caSHong Zhang x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 10148c37ef55SBarry Smith } 1015b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 10169566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 10179566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 10189566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 10199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 10209566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 10213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10228c37ef55SBarry Smith } 1023026e39d0SSatish Balay 1024ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 1025d71ae5a4SJacob Faibussowitsch { 10262bebee5dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 10272bebee5dSHong Zhang IS iscol = a->col, isrow = a->row; 10285d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1029910cf402Sprj- PetscInt nz, neq, ldb, ldx; 10305d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 1031910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1032b65878eeSJunchao Zhang const PetscScalar *b, *aa, *v; 1033910cf402Sprj- PetscBool isdense; 10342bebee5dSHong Zhang 10352bebee5dSHong Zhang PetscFunctionBegin; 10363ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 10379566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 103828b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1039f9fb9879SHong Zhang if (X != B) { 10409566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 104128b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1042f9fb9879SHong Zhang } 1043b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 10449566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 10459566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 10469566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 10479566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 10489371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 10499371c9d4SSatish Balay r = rout; 10509371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 10519371c9d4SSatish Balay c = cout; 1052a36b22ccSSatish Balay for (neq = 0; neq < B->cmap->n; neq++) { 10532bebee5dSHong Zhang /* forward solve the lower triangular */ 10542bebee5dSHong Zhang tmp[0] = b[r[0]]; 10552bebee5dSHong Zhang tmps = tmp; 10562bebee5dSHong Zhang for (i = 1; i < n; i++) { 10572bebee5dSHong Zhang v = aa + ai[i]; 10582bebee5dSHong Zhang vi = aj + ai[i]; 10592bebee5dSHong Zhang nz = a->diag[i] - ai[i]; 10602bebee5dSHong Zhang sum = b[r[i]]; 1061003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 10622bebee5dSHong Zhang tmp[i] = sum; 10632bebee5dSHong Zhang } 10642bebee5dSHong Zhang /* backward solve the upper triangular */ 10652bebee5dSHong Zhang for (i = n - 1; i >= 0; i--) { 10662bebee5dSHong Zhang v = aa + a->diag[i] + 1; 10672bebee5dSHong Zhang vi = aj + a->diag[i] + 1; 10682bebee5dSHong Zhang nz = ai[i + 1] - a->diag[i] - 1; 10692bebee5dSHong Zhang sum = tmp[i]; 1070003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 10712bebee5dSHong Zhang x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 10722bebee5dSHong Zhang } 1073910cf402Sprj- b += ldb; 1074910cf402Sprj- x += ldx; 10752bebee5dSHong Zhang } 1076b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 10779566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 10789566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 10799566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 10809566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 10819566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 10823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10832bebee5dSHong Zhang } 10842bebee5dSHong Zhang 1085d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 1086d71ae5a4SJacob Faibussowitsch { 10879bd0847aSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 10889bd0847aSShri Abhyankar IS iscol = a->col, isrow = a->row; 10899bd0847aSShri Abhyankar PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1090910cf402Sprj- PetscInt nz, neq, ldb, ldx; 10919bd0847aSShri Abhyankar const PetscInt *rout, *cout, *r, *c; 1092910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, sum; 1093b65878eeSJunchao Zhang const PetscScalar *b, *aa, *v; 1094910cf402Sprj- PetscBool isdense; 10959bd0847aSShri Abhyankar 10969bd0847aSShri Abhyankar PetscFunctionBegin; 10973ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 10989566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 109928b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1100f9fb9879SHong Zhang if (X != B) { 11019566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 110228b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1103f9fb9879SHong Zhang } 1104b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 11059566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 11069566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 11079566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 11089566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 11099371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 11109371c9d4SSatish Balay r = rout; 11119371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 11129371c9d4SSatish Balay c = cout; 11139bd0847aSShri Abhyankar for (neq = 0; neq < B->cmap->n; neq++) { 11149bd0847aSShri Abhyankar /* forward solve the lower triangular */ 11159bd0847aSShri Abhyankar tmp[0] = b[r[0]]; 11169bd0847aSShri Abhyankar v = aa; 11179bd0847aSShri Abhyankar vi = aj; 11189bd0847aSShri Abhyankar for (i = 1; i < n; i++) { 11199bd0847aSShri Abhyankar nz = ai[i + 1] - ai[i]; 11209bd0847aSShri Abhyankar sum = b[r[i]]; 11219bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 11229bd0847aSShri Abhyankar tmp[i] = sum; 11239371c9d4SSatish Balay v += nz; 11249371c9d4SSatish Balay vi += nz; 11259bd0847aSShri Abhyankar } 11269bd0847aSShri Abhyankar /* backward solve the upper triangular */ 11279bd0847aSShri Abhyankar for (i = n - 1; i >= 0; i--) { 11289bd0847aSShri Abhyankar v = aa + adiag[i + 1] + 1; 11299bd0847aSShri Abhyankar vi = aj + adiag[i + 1] + 1; 11309bd0847aSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 11319bd0847aSShri Abhyankar sum = tmp[i]; 11329bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 11339bd0847aSShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 11349bd0847aSShri Abhyankar } 1135910cf402Sprj- b += ldb; 1136910cf402Sprj- x += ldx; 11379bd0847aSShri Abhyankar } 1138b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 11399566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 11409566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 11419566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 11429566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 11439566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 11443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11459bd0847aSShri Abhyankar } 11469bd0847aSShri Abhyankar 1147a3d9026eSPierre Jolivet PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X) 1148a3d9026eSPierre Jolivet { 1149a3d9026eSPierre Jolivet Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1150a3d9026eSPierre Jolivet IS iscol = a->col, isrow = a->row; 1151a3d9026eSPierre Jolivet PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j; 1152a3d9026eSPierre Jolivet PetscInt nz, neq, ldb, ldx; 1153a3d9026eSPierre Jolivet const PetscInt *rout, *cout, *r, *c; 1154a3d9026eSPierre Jolivet PetscScalar *x, *tmp = a->solve_work, s1; 1155b65878eeSJunchao Zhang const PetscScalar *b, *aa, *v; 1156a3d9026eSPierre Jolivet PetscBool isdense; 1157a3d9026eSPierre Jolivet 1158a3d9026eSPierre Jolivet PetscFunctionBegin; 1159a3d9026eSPierre Jolivet if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1160a3d9026eSPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1161a3d9026eSPierre Jolivet PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1162a3d9026eSPierre Jolivet if (X != B) { 1163a3d9026eSPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1164a3d9026eSPierre Jolivet PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1165a3d9026eSPierre Jolivet } 1166b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1167a3d9026eSPierre Jolivet PetscCall(MatDenseGetArrayRead(B, &b)); 1168a3d9026eSPierre Jolivet PetscCall(MatDenseGetLDA(B, &ldb)); 1169a3d9026eSPierre Jolivet PetscCall(MatDenseGetArray(X, &x)); 1170a3d9026eSPierre Jolivet PetscCall(MatDenseGetLDA(X, &ldx)); 1171a3d9026eSPierre Jolivet PetscCall(ISGetIndices(isrow, &rout)); 1172a3d9026eSPierre Jolivet r = rout; 1173a3d9026eSPierre Jolivet PetscCall(ISGetIndices(iscol, &cout)); 1174a3d9026eSPierre Jolivet c = cout; 1175a3d9026eSPierre Jolivet for (neq = 0; neq < B->cmap->n; neq++) { 1176a3d9026eSPierre Jolivet /* copy the b into temp work space according to permutation */ 1177a3d9026eSPierre Jolivet for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1178a3d9026eSPierre Jolivet 1179a3d9026eSPierre Jolivet /* forward solve the U^T */ 1180a3d9026eSPierre Jolivet for (i = 0; i < n; i++) { 1181a3d9026eSPierre Jolivet v = aa + adiag[i + 1] + 1; 1182a3d9026eSPierre Jolivet vi = aj + adiag[i + 1] + 1; 1183a3d9026eSPierre Jolivet nz = adiag[i] - adiag[i + 1] - 1; 1184a3d9026eSPierre Jolivet s1 = tmp[i]; 1185a3d9026eSPierre Jolivet s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1186a3d9026eSPierre Jolivet for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1187a3d9026eSPierre Jolivet tmp[i] = s1; 1188a3d9026eSPierre Jolivet } 1189a3d9026eSPierre Jolivet 1190a3d9026eSPierre Jolivet /* backward solve the L^T */ 1191a3d9026eSPierre Jolivet for (i = n - 1; i >= 0; i--) { 1192a3d9026eSPierre Jolivet v = aa + ai[i]; 1193a3d9026eSPierre Jolivet vi = aj + ai[i]; 1194a3d9026eSPierre Jolivet nz = ai[i + 1] - ai[i]; 1195a3d9026eSPierre Jolivet s1 = tmp[i]; 1196a3d9026eSPierre Jolivet for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1197a3d9026eSPierre Jolivet } 1198a3d9026eSPierre Jolivet 1199a3d9026eSPierre Jolivet /* copy tmp into x according to permutation */ 1200a3d9026eSPierre Jolivet for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1201a3d9026eSPierre Jolivet b += ldb; 1202a3d9026eSPierre Jolivet x += ldx; 1203a3d9026eSPierre Jolivet } 1204b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1205a3d9026eSPierre Jolivet PetscCall(ISRestoreIndices(isrow, &rout)); 1206a3d9026eSPierre Jolivet PetscCall(ISRestoreIndices(iscol, &cout)); 1207a3d9026eSPierre Jolivet PetscCall(MatDenseRestoreArrayRead(B, &b)); 1208a3d9026eSPierre Jolivet PetscCall(MatDenseRestoreArray(X, &x)); 1209a3d9026eSPierre Jolivet PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1210a3d9026eSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 1211a3d9026eSPierre Jolivet } 1212a3d9026eSPierre Jolivet 1213ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 1214d71ae5a4SJacob Faibussowitsch { 1215137fb511SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1216137fb511SHong Zhang IS iscol = a->col, isrow = a->row; 12175d0c19d7SBarry Smith const PetscInt *r, *c, *rout, *cout; 12185d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 12195d0c19d7SBarry Smith PetscInt nz, row; 1220fdc842d1SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 1221fdc842d1SBarry Smith const PetscScalar *b; 1222b65878eeSJunchao Zhang const MatScalar *aa, *v; 1223137fb511SHong Zhang 1224137fb511SHong Zhang PetscFunctionBegin; 12253ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1226137fb511SHong Zhang 1227b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 12289566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12299566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1230137fb511SHong Zhang tmp = a->solve_work; 1231137fb511SHong Zhang 12329371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 12339371c9d4SSatish Balay r = rout; 12349371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 12359371c9d4SSatish Balay c = cout + (n - 1); 1236137fb511SHong Zhang 1237137fb511SHong Zhang /* forward solve the lower triangular */ 1238137fb511SHong Zhang tmp[0] = b[*r++]; 1239137fb511SHong Zhang tmps = tmp; 1240137fb511SHong Zhang for (row = 1; row < n; row++) { 1241137fb511SHong Zhang i = rout[row]; /* permuted row */ 1242137fb511SHong Zhang v = aa + ai[i]; 1243137fb511SHong Zhang vi = aj + ai[i]; 1244137fb511SHong Zhang nz = a->diag[i] - ai[i]; 1245137fb511SHong Zhang sum = b[*r++]; 1246003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1247137fb511SHong Zhang tmp[row] = sum; 1248137fb511SHong Zhang } 1249137fb511SHong Zhang 1250137fb511SHong Zhang /* backward solve the upper triangular */ 1251137fb511SHong Zhang for (row = n - 1; row >= 0; row--) { 1252137fb511SHong Zhang i = rout[row]; /* permuted row */ 1253137fb511SHong Zhang v = aa + a->diag[i] + 1; 1254137fb511SHong Zhang vi = aj + a->diag[i] + 1; 1255137fb511SHong Zhang nz = ai[i + 1] - a->diag[i] - 1; 1256137fb511SHong Zhang sum = tmp[row]; 1257003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1258137fb511SHong Zhang x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1259137fb511SHong Zhang } 1260b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 12619566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 12629566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 12639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 12659566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 12663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1267137fb511SHong Zhang } 1268137fb511SHong Zhang 1269c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1270ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1271d71ae5a4SJacob Faibussowitsch { 1272930ae53cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1273d0f46423SBarry Smith PetscInt n = A->rmap->n; 1274b377110cSBarry Smith const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1275356650c2SBarry Smith PetscScalar *x; 1276356650c2SBarry Smith const PetscScalar *b; 1277b65878eeSJunchao Zhang const MatScalar *aa; 1278aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1279356650c2SBarry Smith PetscInt adiag_i, i, nz, ai_i; 1280b377110cSBarry Smith const PetscInt *vi; 1281dd6ea824SBarry Smith const MatScalar *v; 1282dd6ea824SBarry Smith PetscScalar sum; 1283d85afc42SSatish Balay #endif 1284930ae53cSBarry Smith 12853a40ed3dSBarry Smith PetscFunctionBegin; 12863ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1287930ae53cSBarry Smith 1288b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 12899566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12909566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1291930ae53cSBarry Smith 1292aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 12931c4feecaSSatish Balay fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 12941c4feecaSSatish Balay #else 1295930ae53cSBarry Smith /* forward solve the lower triangular */ 1296930ae53cSBarry Smith x[0] = b[0]; 1297930ae53cSBarry Smith for (i = 1; i < n; i++) { 1298930ae53cSBarry Smith ai_i = ai[i]; 1299930ae53cSBarry Smith v = aa + ai_i; 1300930ae53cSBarry Smith vi = aj + ai_i; 1301930ae53cSBarry Smith nz = adiag[i] - ai_i; 1302930ae53cSBarry Smith sum = b[i]; 1303003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1304930ae53cSBarry Smith x[i] = sum; 1305930ae53cSBarry Smith } 1306930ae53cSBarry Smith 1307930ae53cSBarry Smith /* backward solve the upper triangular */ 1308930ae53cSBarry Smith for (i = n - 1; i >= 0; i--) { 1309930ae53cSBarry Smith adiag_i = adiag[i]; 1310d708749eSBarry Smith v = aa + adiag_i + 1; 1311d708749eSBarry Smith vi = aj + adiag_i + 1; 1312930ae53cSBarry Smith nz = ai[i + 1] - adiag_i - 1; 1313930ae53cSBarry Smith sum = x[i]; 1314003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1315930ae53cSBarry Smith x[i] = sum * aa[adiag_i]; 1316930ae53cSBarry Smith } 13171c4feecaSSatish Balay #endif 13189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1319b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 13209566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13219566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 13223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1323930ae53cSBarry Smith } 1324930ae53cSBarry Smith 1325ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1326d71ae5a4SJacob Faibussowitsch { 1327416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1328416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 132904fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 13305d0c19d7SBarry Smith PetscInt nz; 133104fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 133204fbf559SBarry Smith PetscScalar *x, *tmp, sum; 133304fbf559SBarry Smith const PetscScalar *b; 1334b65878eeSJunchao Zhang const MatScalar *aa, *v; 1335da3a660dSBarry Smith 13363a40ed3dSBarry Smith PetscFunctionBegin; 13379566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 1338da3a660dSBarry Smith 1339b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 13409566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 13419566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1342416022c9SBarry Smith tmp = a->solve_work; 1343da3a660dSBarry Smith 13449371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 13459371c9d4SSatish Balay r = rout; 13469371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 13479371c9d4SSatish Balay c = cout + (n - 1); 1348da3a660dSBarry Smith 1349da3a660dSBarry Smith /* forward solve the lower triangular */ 1350da3a660dSBarry Smith tmp[0] = b[*r++]; 1351da3a660dSBarry Smith for (i = 1; i < n; i++) { 1352010a20caSHong Zhang v = aa + ai[i]; 1353010a20caSHong Zhang vi = aj + ai[i]; 1354416022c9SBarry Smith nz = a->diag[i] - ai[i]; 1355da3a660dSBarry Smith sum = b[*r++]; 135604fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1357da3a660dSBarry Smith tmp[i] = sum; 1358da3a660dSBarry Smith } 1359da3a660dSBarry Smith 1360da3a660dSBarry Smith /* backward solve the upper triangular */ 1361da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1362010a20caSHong Zhang v = aa + a->diag[i] + 1; 1363010a20caSHong Zhang vi = aj + a->diag[i] + 1; 1364416022c9SBarry Smith nz = ai[i + 1] - a->diag[i] - 1; 1365da3a660dSBarry Smith sum = tmp[i]; 136604fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1367010a20caSHong Zhang tmp[i] = sum * aa[a->diag[i]]; 1368da3a660dSBarry Smith x[*c--] += tmp[i]; 1369da3a660dSBarry Smith } 1370da3a660dSBarry Smith 1371b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 13729566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 13739566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 13749566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13759566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 13769566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 13773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1378da3a660dSBarry Smith } 137904fbf559SBarry Smith 1380d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1381d71ae5a4SJacob Faibussowitsch { 13823c0119dfSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 13833c0119dfSShri Abhyankar IS iscol = a->col, isrow = a->row; 13843c0119dfSShri Abhyankar PetscInt i, n = A->rmap->n, j; 13853c0119dfSShri Abhyankar PetscInt nz; 13863c0119dfSShri Abhyankar const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 13873c0119dfSShri Abhyankar PetscScalar *x, *tmp, sum; 13883c0119dfSShri Abhyankar const PetscScalar *b; 1389b65878eeSJunchao Zhang const MatScalar *aa, *v; 13903c0119dfSShri Abhyankar 13913c0119dfSShri Abhyankar PetscFunctionBegin; 13929566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 13933c0119dfSShri Abhyankar 1394b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 13959566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 13969566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 13973c0119dfSShri Abhyankar tmp = a->solve_work; 13983c0119dfSShri Abhyankar 13999371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 14009371c9d4SSatish Balay r = rout; 14019371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 14029371c9d4SSatish Balay c = cout; 14033c0119dfSShri Abhyankar 14043c0119dfSShri Abhyankar /* forward solve the lower triangular */ 14053c0119dfSShri Abhyankar tmp[0] = b[r[0]]; 14063c0119dfSShri Abhyankar v = aa; 14073c0119dfSShri Abhyankar vi = aj; 14083c0119dfSShri Abhyankar for (i = 1; i < n; i++) { 14093c0119dfSShri Abhyankar nz = ai[i + 1] - ai[i]; 14103c0119dfSShri Abhyankar sum = b[r[i]]; 14113c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 14123c0119dfSShri Abhyankar tmp[i] = sum; 14132205254eSKarl Rupp v += nz; 14142205254eSKarl Rupp vi += nz; 14153c0119dfSShri Abhyankar } 14163c0119dfSShri Abhyankar 14173c0119dfSShri Abhyankar /* backward solve the upper triangular */ 14183c0119dfSShri Abhyankar v = aa + adiag[n - 1]; 14193c0119dfSShri Abhyankar vi = aj + adiag[n - 1]; 14203c0119dfSShri Abhyankar for (i = n - 1; i >= 0; i--) { 14213c0119dfSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 14223c0119dfSShri Abhyankar sum = tmp[i]; 14233c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 14243c0119dfSShri Abhyankar tmp[i] = sum * v[nz]; 14253c0119dfSShri Abhyankar x[c[i]] += tmp[i]; 14269371c9d4SSatish Balay v += nz + 1; 14279371c9d4SSatish Balay vi += nz + 1; 14283c0119dfSShri Abhyankar } 14293c0119dfSShri Abhyankar 1430b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 14319566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 14329566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 14339566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 14349566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 14359566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 14363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14373c0119dfSShri Abhyankar } 14383c0119dfSShri Abhyankar 1439d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1440d71ae5a4SJacob Faibussowitsch { 1441416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 14422235e667SBarry Smith IS iscol = a->col, isrow = a->row; 144304fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 144404fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 144504fbf559SBarry Smith PetscInt nz; 144604fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1447b65878eeSJunchao Zhang const MatScalar *aa, *v; 144804fbf559SBarry Smith const PetscScalar *b; 1449da3a660dSBarry Smith 14503a40ed3dSBarry Smith PetscFunctionBegin; 1451b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 14529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 14539566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1454416022c9SBarry Smith tmp = a->solve_work; 1455da3a660dSBarry Smith 14569371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 14579371c9d4SSatish Balay r = rout; 14589371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 14599371c9d4SSatish Balay c = cout; 1460da3a660dSBarry Smith 1461da3a660dSBarry Smith /* copy the b into temp work space according to permutation */ 14622235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1463da3a660dSBarry Smith 1464da3a660dSBarry Smith /* forward solve the U^T */ 1465da3a660dSBarry Smith for (i = 0; i < n; i++) { 1466010a20caSHong Zhang v = aa + diag[i]; 1467010a20caSHong Zhang vi = aj + diag[i] + 1; 1468f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 1469c38d4ed2SBarry Smith s1 = tmp[i]; 1470ef66eb69SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 147104fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1472c38d4ed2SBarry Smith tmp[i] = s1; 1473da3a660dSBarry Smith } 1474da3a660dSBarry Smith 1475da3a660dSBarry Smith /* backward solve the L^T */ 1476da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1477010a20caSHong Zhang v = aa + diag[i] - 1; 1478010a20caSHong Zhang vi = aj + diag[i] - 1; 1479f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 1480f1af5d2fSBarry Smith s1 = tmp[i]; 148104fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1482da3a660dSBarry Smith } 1483da3a660dSBarry Smith 1484da3a660dSBarry Smith /* copy tmp into x according to permutation */ 1485591294e4SBarry Smith for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1486da3a660dSBarry Smith 14879566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 14889566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1489b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 14909566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 14919566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1492da3a660dSBarry Smith 14939566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 14943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1495da3a660dSBarry Smith } 1496da3a660dSBarry Smith 1497d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1498d71ae5a4SJacob Faibussowitsch { 1499d1fa9404SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1500d1fa9404SShri Abhyankar IS iscol = a->col, isrow = a->row; 1501d1fa9404SShri Abhyankar const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1502d1fa9404SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1503d1fa9404SShri Abhyankar PetscInt nz; 1504d1fa9404SShri Abhyankar PetscScalar *x, *tmp, s1; 1505b65878eeSJunchao Zhang const MatScalar *aa, *v; 1506d1fa9404SShri Abhyankar const PetscScalar *b; 1507d1fa9404SShri Abhyankar 1508d1fa9404SShri Abhyankar PetscFunctionBegin; 1509b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 15109566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 15119566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1512d1fa9404SShri Abhyankar tmp = a->solve_work; 1513d1fa9404SShri Abhyankar 15149371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 15159371c9d4SSatish Balay r = rout; 15169371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 15179371c9d4SSatish Balay c = cout; 1518d1fa9404SShri Abhyankar 1519d1fa9404SShri Abhyankar /* copy the b into temp work space according to permutation */ 1520d1fa9404SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1521d1fa9404SShri Abhyankar 1522d1fa9404SShri Abhyankar /* forward solve the U^T */ 1523d1fa9404SShri Abhyankar for (i = 0; i < n; i++) { 1524d1fa9404SShri Abhyankar v = aa + adiag[i + 1] + 1; 1525d1fa9404SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1526d1fa9404SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1527d1fa9404SShri Abhyankar s1 = tmp[i]; 1528d1fa9404SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1529d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1530d1fa9404SShri Abhyankar tmp[i] = s1; 1531d1fa9404SShri Abhyankar } 1532d1fa9404SShri Abhyankar 1533d1fa9404SShri Abhyankar /* backward solve the L^T */ 1534d1fa9404SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1535d1fa9404SShri Abhyankar v = aa + ai[i]; 1536d1fa9404SShri Abhyankar vi = aj + ai[i]; 1537d1fa9404SShri Abhyankar nz = ai[i + 1] - ai[i]; 1538d1fa9404SShri Abhyankar s1 = tmp[i]; 1539d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1540d1fa9404SShri Abhyankar } 1541d1fa9404SShri Abhyankar 1542d1fa9404SShri Abhyankar /* copy tmp into x according to permutation */ 1543d1fa9404SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1544d1fa9404SShri Abhyankar 15459566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 15469566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1547b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 15489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 15499566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1550d1fa9404SShri Abhyankar 15519566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 15523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1553d1fa9404SShri Abhyankar } 1554d1fa9404SShri Abhyankar 1555d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1556d71ae5a4SJacob Faibussowitsch { 1557416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 15582235e667SBarry Smith IS iscol = a->col, isrow = a->row; 155904fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 156004fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 156104fbf559SBarry Smith PetscInt nz; 156204fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1563b65878eeSJunchao Zhang const MatScalar *aa, *v; 156404fbf559SBarry Smith const PetscScalar *b; 15656abc6512SBarry Smith 15663a40ed3dSBarry Smith PetscFunctionBegin; 15679566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 1568b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 15699566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 15709566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1571416022c9SBarry Smith tmp = a->solve_work; 15726abc6512SBarry Smith 15739371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 15749371c9d4SSatish Balay r = rout; 15759371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 15769371c9d4SSatish Balay c = cout; 15776abc6512SBarry Smith 15786abc6512SBarry Smith /* copy the b into temp work space according to permutation */ 15792235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 15806abc6512SBarry Smith 15816abc6512SBarry Smith /* forward solve the U^T */ 15826abc6512SBarry Smith for (i = 0; i < n; i++) { 1583010a20caSHong Zhang v = aa + diag[i]; 1584010a20caSHong Zhang vi = aj + diag[i] + 1; 1585f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 158604fbf559SBarry Smith s1 = tmp[i]; 158704fbf559SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 158804fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 158904fbf559SBarry Smith tmp[i] = s1; 15906abc6512SBarry Smith } 15916abc6512SBarry Smith 15926abc6512SBarry Smith /* backward solve the L^T */ 15936abc6512SBarry Smith for (i = n - 1; i >= 0; i--) { 1594010a20caSHong Zhang v = aa + diag[i] - 1; 1595010a20caSHong Zhang vi = aj + diag[i] - 1; 1596f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 159704fbf559SBarry Smith s1 = tmp[i]; 159804fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 15996abc6512SBarry Smith } 16006abc6512SBarry Smith 16016abc6512SBarry Smith /* copy tmp into x according to permutation */ 16026abc6512SBarry Smith for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 16036abc6512SBarry Smith 16049566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 16059566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1606b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 16079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 16089566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 16096abc6512SBarry Smith 16109566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 16113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1612da3a660dSBarry Smith } 161304fbf559SBarry Smith 1614d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1615d71ae5a4SJacob Faibussowitsch { 1616533e6011SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1617533e6011SShri Abhyankar IS iscol = a->col, isrow = a->row; 1618533e6011SShri Abhyankar const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1619533e6011SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1620533e6011SShri Abhyankar PetscInt nz; 1621533e6011SShri Abhyankar PetscScalar *x, *tmp, s1; 1622b65878eeSJunchao Zhang const MatScalar *aa, *v; 1623533e6011SShri Abhyankar const PetscScalar *b; 1624533e6011SShri Abhyankar 1625533e6011SShri Abhyankar PetscFunctionBegin; 16269566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 1627b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 16289566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 16299566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1630533e6011SShri Abhyankar tmp = a->solve_work; 1631533e6011SShri Abhyankar 16329371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 16339371c9d4SSatish Balay r = rout; 16349371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 16359371c9d4SSatish Balay c = cout; 1636533e6011SShri Abhyankar 1637533e6011SShri Abhyankar /* copy the b into temp work space according to permutation */ 1638533e6011SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1639533e6011SShri Abhyankar 1640533e6011SShri Abhyankar /* forward solve the U^T */ 1641533e6011SShri Abhyankar for (i = 0; i < n; i++) { 1642533e6011SShri Abhyankar v = aa + adiag[i + 1] + 1; 1643533e6011SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1644533e6011SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1645533e6011SShri Abhyankar s1 = tmp[i]; 1646533e6011SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1647533e6011SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1648533e6011SShri Abhyankar tmp[i] = s1; 1649533e6011SShri Abhyankar } 1650533e6011SShri Abhyankar 1651533e6011SShri Abhyankar /* backward solve the L^T */ 1652533e6011SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1653533e6011SShri Abhyankar v = aa + ai[i]; 1654533e6011SShri Abhyankar vi = aj + ai[i]; 1655533e6011SShri Abhyankar nz = ai[i + 1] - ai[i]; 1656533e6011SShri Abhyankar s1 = tmp[i]; 1657c5e3b2a3SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1658533e6011SShri Abhyankar } 1659533e6011SShri Abhyankar 1660533e6011SShri Abhyankar /* copy tmp into x according to permutation */ 1661533e6011SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1662533e6011SShri Abhyankar 16639566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 16649566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1665b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 16669566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 16679566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 1668533e6011SShri Abhyankar 16699566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 16703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1671533e6011SShri Abhyankar } 1672533e6011SShri Abhyankar 16738fc3a347SHong Zhang /* 16748a76ed4fSHong Zhang ilu() under revised new data structure. 1675813a1f2bSShri Abhyankar Factored arrays bj and ba are stored as 1676813a1f2bSShri Abhyankar L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1677813a1f2bSShri Abhyankar 1678813a1f2bSShri Abhyankar bi=fact->i is an array of size n+1, in which 1679baabb860SHong Zhang bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1680baabb860SHong Zhang bi[n]: points to L(n-1,n-1)+1 1681baabb860SHong Zhang 1682813a1f2bSShri Abhyankar bdiag=fact->diag is an array of size n+1,in which 1683baabb860SHong Zhang bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1684a24f213cSHong Zhang bdiag[n]: points to entry of U(n-1,0)-1 1685813a1f2bSShri Abhyankar 1686813a1f2bSShri Abhyankar U(i,:) contains bdiag[i] as its last entry, i.e., 1687813a1f2bSShri Abhyankar U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1688813a1f2bSShri Abhyankar */ 1689d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1690d71ae5a4SJacob Faibussowitsch { 1691813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1692bbd65245SShri Abhyankar const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1693bbd65245SShri Abhyankar PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 16941df811f5SHong Zhang IS isicol; 1695813a1f2bSShri Abhyankar 1696813a1f2bSShri Abhyankar PetscFunctionBegin; 16979566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 16989566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 169957508eceSPierre Jolivet b = (Mat_SeqAIJ *)fact->data; 1700813a1f2bSShri Abhyankar 1701813a1f2bSShri Abhyankar /* allocate matrix arrays for new data structure */ 1702*84648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a)); 1703*84648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j)); 17049f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i)); 17059f0612e4SBarry Smith if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 17069f0612e4SBarry Smith b->free_a = PETSC_TRUE; 17079f0612e4SBarry Smith b->free_ij = PETSC_TRUE; 17082205254eSKarl Rupp 1709aa624791SPierre Jolivet if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1710813a1f2bSShri Abhyankar bdiag = b->diag; 1711813a1f2bSShri Abhyankar 1712813a1f2bSShri Abhyankar /* set bi and bj with new data structure */ 1713813a1f2bSShri Abhyankar bi = b->i; 1714813a1f2bSShri Abhyankar bj = b->j; 1715813a1f2bSShri Abhyankar 1716813a1f2bSShri Abhyankar /* L part */ 1717e60cf9a0SBarry Smith bi[0] = 0; 1718813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1719813a1f2bSShri Abhyankar nz = adiag[i] - ai[i]; 1720813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nz; 1721813a1f2bSShri Abhyankar aj = a->j + ai[i]; 1722ad540459SPierre Jolivet for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1723813a1f2bSShri Abhyankar } 1724813a1f2bSShri Abhyankar 1725813a1f2bSShri Abhyankar /* U part */ 172662fbd6afSShri Abhyankar bdiag[n] = bi[n] - 1; 1727813a1f2bSShri Abhyankar for (i = n - 1; i >= 0; i--) { 1728813a1f2bSShri Abhyankar nz = ai[i + 1] - adiag[i] - 1; 1729813a1f2bSShri Abhyankar aj = a->j + adiag[i] + 1; 1730ad540459SPierre Jolivet for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1731813a1f2bSShri Abhyankar /* diag[i] */ 1732bbd65245SShri Abhyankar bj[k++] = i; 1733a24f213cSHong Zhang bdiag[i] = bdiag[i + 1] + nz + 1; 1734813a1f2bSShri Abhyankar } 17351df811f5SHong Zhang 1736d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 17371df811f5SHong Zhang fact->info.factor_mallocs = 0; 17381df811f5SHong Zhang fact->info.fill_ratio_given = info->fill; 17391df811f5SHong Zhang fact->info.fill_ratio_needed = 1.0; 174035233d99SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 17419566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 17421df811f5SHong Zhang 174357508eceSPierre Jolivet b = (Mat_SeqAIJ *)fact->data; 17441df811f5SHong Zhang b->row = isrow; 17451df811f5SHong Zhang b->col = iscol; 17461df811f5SHong Zhang b->icol = isicol; 1747*84648c2dSPierre Jolivet PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work)); 17489566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 17499566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 17503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1751813a1f2bSShri Abhyankar } 1752813a1f2bSShri Abhyankar 1753d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1754d71ae5a4SJacob Faibussowitsch { 1755813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1756813a1f2bSShri Abhyankar IS isicol; 1757813a1f2bSShri Abhyankar const PetscInt *r, *ic; 17581df811f5SHong Zhang PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1759813a1f2bSShri Abhyankar PetscInt *bi, *cols, nnz, *cols_lvl; 1760813a1f2bSShri Abhyankar PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1761813a1f2bSShri Abhyankar PetscInt i, levels, diagonal_fill; 1762035f4963SHong Zhang PetscBool col_identity, row_identity, missing; 1763813a1f2bSShri Abhyankar PetscReal f; 17640298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1765813a1f2bSShri Abhyankar PetscBT lnkbt; 1766813a1f2bSShri Abhyankar PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 17670298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 17680298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1769813a1f2bSShri Abhyankar 1770813a1f2bSShri Abhyankar PetscFunctionBegin; 177108401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 17729566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 177328b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1774ad04f41aSHong Zhang 1775813a1f2bSShri Abhyankar levels = (PetscInt)info->levels; 17769566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 17779566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 1778813a1f2bSShri Abhyankar if (!levels && row_identity && col_identity) { 1779813a1f2bSShri Abhyankar /* special case: ilu(0) with natural ordering */ 17809566063dSJacob Faibussowitsch PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 17814d12350bSJunchao Zhang if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 17823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1783813a1f2bSShri Abhyankar } 1784813a1f2bSShri Abhyankar 17859566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 17869566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 17879566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 1788813a1f2bSShri Abhyankar 17891bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 17909f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 17919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1792e60cf9a0SBarry Smith bi[0] = bdiag[0] = 0; 17939566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1794813a1f2bSShri Abhyankar 1795813a1f2bSShri Abhyankar /* create a linked list for storing column indices of the active row */ 1796813a1f2bSShri Abhyankar nlnk = n + 1; 17979566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1798813a1f2bSShri Abhyankar 1799813a1f2bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 18001df811f5SHong Zhang f = info->fill; 18011df811f5SHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 18029566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1803813a1f2bSShri Abhyankar current_space = free_space; 18049566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1805813a1f2bSShri Abhyankar current_space_lvl = free_space_lvl; 1806813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1807813a1f2bSShri Abhyankar nzi = 0; 1808813a1f2bSShri Abhyankar /* copy current row into linked list */ 1809813a1f2bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 181028b400f6SJacob Faibussowitsch PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1811813a1f2bSShri Abhyankar cols = aj + ai[r[i]]; 1812813a1f2bSShri Abhyankar lnk[i] = -1; /* marker to indicate if diagonal exists */ 18139566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1814813a1f2bSShri Abhyankar nzi += nlnk; 1815813a1f2bSShri Abhyankar 1816813a1f2bSShri Abhyankar /* make sure diagonal entry is included */ 1817813a1f2bSShri Abhyankar if (diagonal_fill && lnk[i] == -1) { 1818813a1f2bSShri Abhyankar fm = n; 1819813a1f2bSShri Abhyankar while (lnk[fm] < i) fm = lnk[fm]; 1820813a1f2bSShri Abhyankar lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1821813a1f2bSShri Abhyankar lnk[fm] = i; 1822813a1f2bSShri Abhyankar lnk_lvl[i] = 0; 18239371c9d4SSatish Balay nzi++; 18249371c9d4SSatish Balay dcount++; 1825813a1f2bSShri Abhyankar } 1826813a1f2bSShri Abhyankar 1827813a1f2bSShri Abhyankar /* add pivot rows into the active row */ 1828813a1f2bSShri Abhyankar nzbd = 0; 1829813a1f2bSShri Abhyankar prow = lnk[n]; 1830813a1f2bSShri Abhyankar while (prow < i) { 1831813a1f2bSShri Abhyankar nnz = bdiag[prow]; 1832813a1f2bSShri Abhyankar cols = bj_ptr[prow] + nnz + 1; 1833813a1f2bSShri Abhyankar cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1834813a1f2bSShri Abhyankar nnz = bi[prow + 1] - bi[prow] - nnz - 1; 18359566063dSJacob Faibussowitsch PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1836813a1f2bSShri Abhyankar nzi += nlnk; 1837813a1f2bSShri Abhyankar prow = lnk[prow]; 1838813a1f2bSShri Abhyankar nzbd++; 1839813a1f2bSShri Abhyankar } 1840813a1f2bSShri Abhyankar bdiag[i] = nzbd; 1841813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 1842813a1f2bSShri Abhyankar /* if free space is not available, make more free space */ 1843813a1f2bSShri Abhyankar if (current_space->local_remaining < nzi) { 1844f91af8c7SBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 18459566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 18469566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1847813a1f2bSShri Abhyankar reallocs++; 1848813a1f2bSShri Abhyankar } 1849813a1f2bSShri Abhyankar 1850813a1f2bSShri Abhyankar /* copy data into free_space and free_space_lvl, then initialize lnk */ 18519566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1852813a1f2bSShri Abhyankar bj_ptr[i] = current_space->array; 1853813a1f2bSShri Abhyankar bjlvl_ptr[i] = current_space_lvl->array; 1854813a1f2bSShri Abhyankar 1855813a1f2bSShri Abhyankar /* make sure the active row i has diagonal entry */ 185600045ab3SPierre Jolivet PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1857813a1f2bSShri Abhyankar 1858813a1f2bSShri Abhyankar current_space->array += nzi; 1859813a1f2bSShri Abhyankar current_space->local_used += nzi; 1860813a1f2bSShri Abhyankar current_space->local_remaining -= nzi; 1861813a1f2bSShri Abhyankar current_space_lvl->array += nzi; 1862813a1f2bSShri Abhyankar current_space_lvl->local_used += nzi; 1863813a1f2bSShri Abhyankar current_space_lvl->local_remaining -= nzi; 1864813a1f2bSShri Abhyankar } 1865813a1f2bSShri Abhyankar 18669566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 18679566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 1868813a1f2bSShri Abhyankar /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1869*84648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 18709566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1871813a1f2bSShri Abhyankar 18729566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 18739566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 18749566063dSJacob Faibussowitsch PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1875813a1f2bSShri Abhyankar 1876813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO) 1877813a1f2bSShri Abhyankar { 1878aef85c9fSShri Abhyankar PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 18799566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 18809566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 18819566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 18829566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 188348a46eb9SPierre Jolivet if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1884813a1f2bSShri Abhyankar } 1885813a1f2bSShri Abhyankar #endif 1886813a1f2bSShri Abhyankar /* put together the new matrix */ 18879566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 188857508eceSPierre Jolivet b = (Mat_SeqAIJ *)fact->data; 1889813a1f2bSShri Abhyankar b->free_ij = PETSC_TRUE; 18909f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 18919f0612e4SBarry Smith b->free_a = PETSC_TRUE; 1892813a1f2bSShri Abhyankar b->j = bj; 1893813a1f2bSShri Abhyankar b->i = bi; 1894813a1f2bSShri Abhyankar b->diag = bdiag; 1895f4259b30SLisandro Dalcin b->ilen = NULL; 1896f4259b30SLisandro Dalcin b->imax = NULL; 1897813a1f2bSShri Abhyankar b->row = isrow; 1898813a1f2bSShri Abhyankar b->col = iscol; 18999566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 19009566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 1901813a1f2bSShri Abhyankar b->icol = isicol; 19022205254eSKarl Rupp 1903*84648c2dSPierre Jolivet PetscCall(PetscMalloc1(n, &b->solve_work)); 1904813a1f2bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. 1905813a1f2bSShri Abhyankar Allocate bdiag, solve_work, new a, new j */ 1906f268cba8SShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 19072205254eSKarl Rupp 190857508eceSPierre Jolivet fact->info.factor_mallocs = reallocs; 190957508eceSPierre Jolivet fact->info.fill_ratio_given = f; 191057508eceSPierre Jolivet fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 191157508eceSPierre Jolivet fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 19124d12350bSJunchao Zhang if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 19139566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 19143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1915813a1f2bSShri Abhyankar } 1916813a1f2bSShri Abhyankar 1917ff6a9541SJacob Faibussowitsch #if 0 1918ff6a9541SJacob Faibussowitsch // unused 1919ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1920d71ae5a4SJacob Faibussowitsch { 19216516239fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 19226516239fSHong Zhang IS isicol; 19236516239fSHong Zhang const PetscInt *r, *ic; 1924ece542f9SHong Zhang PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 19256516239fSHong Zhang PetscInt *bi, *cols, nnz, *cols_lvl; 19266516239fSHong Zhang PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 19276516239fSHong Zhang PetscInt i, levels, diagonal_fill; 1928ace3abfcSBarry Smith PetscBool col_identity, row_identity; 19296516239fSHong Zhang PetscReal f; 19300298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 19316516239fSHong Zhang PetscBT lnkbt; 19326516239fSHong Zhang PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 19330298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 19340298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1935ace3abfcSBarry Smith PetscBool missing; 19366516239fSHong Zhang 19376516239fSHong Zhang PetscFunctionBegin; 193808401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 19399566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 194028b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1941ece542f9SHong Zhang 19426516239fSHong Zhang f = info->fill; 19436516239fSHong Zhang levels = (PetscInt)info->levels; 19446516239fSHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 19452205254eSKarl Rupp 19469566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 19476516239fSHong Zhang 19489566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 19499566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 19506516239fSHong Zhang if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 19519566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 19522205254eSKarl Rupp 1953ad04f41aSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 19544d12350bSJunchao Zhang if (a->inode.size_csr) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1955d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 1956719d5645SBarry Smith (fact)->info.factor_mallocs = 0; 1957719d5645SBarry Smith (fact)->info.fill_ratio_given = info->fill; 1958719d5645SBarry Smith (fact)->info.fill_ratio_needed = 1.0; 19592205254eSKarl Rupp 196057508eceSPierre Jolivet b = (Mat_SeqAIJ *)fact->data; 196108480c60SBarry Smith b->row = isrow; 196208480c60SBarry Smith b->col = iscol; 196382bf6240SBarry Smith b->icol = isicol; 19649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 19659566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 19669566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 19673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 196808480c60SBarry Smith } 196908480c60SBarry Smith 19709566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 19719566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 19729e25ed09SBarry Smith 19731bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 19749f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi)); 19759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1976a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 19776cf997b0SBarry Smith 19789566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 19795a8e39fbSHong Zhang 19805a8e39fbSHong Zhang /* create a linked list for storing column indices of the active row */ 19815a8e39fbSHong Zhang nlnk = n + 1; 19829566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 19835a8e39fbSHong Zhang 19845a8e39fbSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 19859566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 19865a8e39fbSHong Zhang current_space = free_space; 19879566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 19885a8e39fbSHong Zhang current_space_lvl = free_space_lvl; 19895a8e39fbSHong Zhang 19905a8e39fbSHong Zhang for (i = 0; i < n; i++) { 19915a8e39fbSHong Zhang nzi = 0; 19926cf997b0SBarry Smith /* copy current row into linked list */ 19935a8e39fbSHong Zhang nnz = ai[r[i] + 1] - ai[r[i]]; 199428b400f6SJacob Faibussowitsch PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 19955a8e39fbSHong Zhang cols = aj + ai[r[i]]; 19965a8e39fbSHong Zhang lnk[i] = -1; /* marker to indicate if diagonal exists */ 19979566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 19985a8e39fbSHong Zhang nzi += nlnk; 19996cf997b0SBarry Smith 20006cf997b0SBarry Smith /* make sure diagonal entry is included */ 20015a8e39fbSHong Zhang if (diagonal_fill && lnk[i] == -1) { 20026cf997b0SBarry Smith fm = n; 20035a8e39fbSHong Zhang while (lnk[fm] < i) fm = lnk[fm]; 20045a8e39fbSHong Zhang lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 20055a8e39fbSHong Zhang lnk[fm] = i; 20065a8e39fbSHong Zhang lnk_lvl[i] = 0; 20079371c9d4SSatish Balay nzi++; 20089371c9d4SSatish Balay dcount++; 20096cf997b0SBarry Smith } 20106cf997b0SBarry Smith 20115a8e39fbSHong Zhang /* add pivot rows into the active row */ 20125a8e39fbSHong Zhang nzbd = 0; 20135a8e39fbSHong Zhang prow = lnk[n]; 20145a8e39fbSHong Zhang while (prow < i) { 20155a8e39fbSHong Zhang nnz = bdiag[prow]; 20165a8e39fbSHong Zhang cols = bj_ptr[prow] + nnz + 1; 20175a8e39fbSHong Zhang cols_lvl = bjlvl_ptr[prow] + nnz + 1; 20185a8e39fbSHong Zhang nnz = bi[prow + 1] - bi[prow] - nnz - 1; 20199566063dSJacob Faibussowitsch PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 20205a8e39fbSHong Zhang nzi += nlnk; 20215a8e39fbSHong Zhang prow = lnk[prow]; 20225a8e39fbSHong Zhang nzbd++; 202356beaf04SBarry Smith } 20245a8e39fbSHong Zhang bdiag[i] = nzbd; 20255a8e39fbSHong Zhang bi[i + 1] = bi[i] + nzi; 2026ecf371e4SBarry Smith 20275a8e39fbSHong Zhang /* if free space is not available, make more free space */ 20285a8e39fbSHong Zhang if (current_space->local_remaining < nzi) { 2029f91af8c7SBarry Smith nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 20309566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 20319566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 20325a8e39fbSHong Zhang reallocs++; 20335a8e39fbSHong Zhang } 2034ecf371e4SBarry Smith 20355a8e39fbSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 20369566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 20375a8e39fbSHong Zhang bj_ptr[i] = current_space->array; 20385a8e39fbSHong Zhang bjlvl_ptr[i] = current_space_lvl->array; 20395a8e39fbSHong Zhang 20405a8e39fbSHong Zhang /* make sure the active row i has diagonal entry */ 204100045ab3SPierre Jolivet PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 20425a8e39fbSHong Zhang 20435a8e39fbSHong Zhang current_space->array += nzi; 20445a8e39fbSHong Zhang current_space->local_used += nzi; 20455a8e39fbSHong Zhang current_space->local_remaining -= nzi; 20465a8e39fbSHong Zhang current_space_lvl->array += nzi; 20475a8e39fbSHong Zhang current_space_lvl->local_used += nzi; 20485a8e39fbSHong Zhang current_space_lvl->local_remaining -= nzi; 20499e25ed09SBarry Smith } 20505a8e39fbSHong Zhang 20519566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 20529566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 20535a8e39fbSHong Zhang 20545a8e39fbSHong Zhang /* destroy list of free space and other temporary arrays */ 20559f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj)); 20569566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 20579566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 20589566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 20599566063dSJacob Faibussowitsch PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 20609e25ed09SBarry Smith 20616cf91177SBarry Smith #if defined(PETSC_USE_INFO) 2062f64065bbSBarry Smith { 20635a8e39fbSHong Zhang PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 20649566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 20659566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 20669566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 20679566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 206848a46eb9SPierre Jolivet if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 2069f64065bbSBarry Smith } 207063ba0a88SBarry Smith #endif 2071bbb6d6a8SBarry Smith 20729e25ed09SBarry Smith /* put together the new matrix */ 20739566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 207457508eceSPierre Jolivet b = (Mat_SeqAIJ *)fact->data; 2075e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 20769f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a)); 20779f0612e4SBarry Smith b->free_a = PETSC_TRUE; 20785a8e39fbSHong Zhang b->j = bj; 20795a8e39fbSHong Zhang b->i = bi; 20805a8e39fbSHong Zhang for (i = 0; i < n; i++) bdiag[i] += bi[i]; 20815a8e39fbSHong Zhang b->diag = bdiag; 2082f4259b30SLisandro Dalcin b->ilen = NULL; 2083f4259b30SLisandro Dalcin b->imax = NULL; 2084416022c9SBarry Smith b->row = isrow; 2085416022c9SBarry Smith b->col = iscol; 20869566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 20879566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 208882bf6240SBarry Smith b->icol = isicol; 20899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2090416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 20915a8e39fbSHong Zhang Allocate bdiag, solve_work, new a, new j */ 20925a8e39fbSHong Zhang b->maxnz = b->nz = bi[n]; 20932205254eSKarl Rupp 209457508eceSPierre Jolivet fact->info.factor_mallocs = reallocs; 209557508eceSPierre Jolivet fact->info.fill_ratio_given = f; 209657508eceSPierre Jolivet fact->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 209757508eceSPierre Jolivet fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 20984d12350bSJunchao Zhang if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 20993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 21009e25ed09SBarry Smith } 2101ff6a9541SJacob Faibussowitsch #endif 2102d5d45c9bSBarry Smith 2103d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 2104d71ae5a4SJacob Faibussowitsch { 21055f44c854SHong Zhang Mat C = B; 21065f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 21075f44c854SHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 21085f44c854SHong Zhang IS ip = b->row, iip = b->icol; 21095f44c854SHong Zhang const PetscInt *rip, *riip; 21105f44c854SHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 21115f44c854SHong Zhang PetscInt *ai = a->i, *aj = a->j; 21125f44c854SHong Zhang PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 2113b65878eeSJunchao Zhang MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 2114ace3abfcSBarry Smith PetscBool perm_identity; 2115d90ac83dSHong Zhang FactorShiftCtx sctx; 211698a93161SHong Zhang PetscReal rs; 2117b65878eeSJunchao Zhang const MatScalar *aa, *v; 2118b65878eeSJunchao Zhang MatScalar d; 211998a93161SHong Zhang 21205f44c854SHong Zhang PetscFunctionBegin; 2121b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 212298a93161SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 21239566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 212498a93161SHong Zhang 2125f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 212698a93161SHong Zhang sctx.shift_top = info->zeropivot; 212798a93161SHong Zhang for (i = 0; i < mbs; i++) { 212898a93161SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 212998a93161SHong Zhang d = (aa)[a->diag[i]]; 213098a93161SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 213198a93161SHong Zhang v = aa + ai[i]; 213298a93161SHong Zhang nz = ai[i + 1] - ai[i]; 21332205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 213498a93161SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 213598a93161SHong Zhang } 213698a93161SHong Zhang sctx.shift_top *= 1.1; 213798a93161SHong Zhang sctx.nshift_max = 5; 213898a93161SHong Zhang sctx.shift_lo = 0.; 213998a93161SHong Zhang sctx.shift_hi = 1.; 214098a93161SHong Zhang } 21415f44c854SHong Zhang 21429566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 21439566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 21445f44c854SHong Zhang 21455f44c854SHong Zhang /* allocate working arrays 21465f44c854SHong Zhang c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 21475f44c854SHong Zhang il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 21485f44c854SHong Zhang */ 21499566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 21505f44c854SHong Zhang 215198a93161SHong Zhang do { 215207b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 215398a93161SHong Zhang 2154c5546cabSHong Zhang for (i = 0; i < mbs; i++) c2r[i] = mbs; 21552e987568SSatish Balay if (mbs) il[0] = 0; 21565f44c854SHong Zhang 21575f44c854SHong Zhang for (k = 0; k < mbs; k++) { 21585f44c854SHong Zhang /* zero rtmp */ 21595f44c854SHong Zhang nz = bi[k + 1] - bi[k]; 21605f44c854SHong Zhang bjtmp = bj + bi[k]; 21615f44c854SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 21625f44c854SHong Zhang 21635f44c854SHong Zhang /* load in initial unfactored row */ 21645f44c854SHong Zhang bval = ba + bi[k]; 21659371c9d4SSatish Balay jmin = ai[rip[k]]; 21669371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 21675f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 21685f44c854SHong Zhang col = riip[aj[j]]; 21695f44c854SHong Zhang if (col >= k) { /* only take upper triangular entry */ 21705f44c854SHong Zhang rtmp[col] = aa[j]; 21715f44c854SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 21725f44c854SHong Zhang } 21735f44c854SHong Zhang } 217498a93161SHong Zhang /* shift the diagonal of the matrix: ZeropivotApply() */ 217598a93161SHong Zhang rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 21765f44c854SHong Zhang 21775f44c854SHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 21785f44c854SHong Zhang dk = rtmp[k]; 21795f44c854SHong Zhang i = c2r[k]; /* first row to be added to k_th row */ 21805f44c854SHong Zhang 21815f44c854SHong Zhang while (i < k) { 21825f44c854SHong Zhang nexti = c2r[i]; /* next row to be added to k_th row */ 21835f44c854SHong Zhang 21845f44c854SHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 21855f44c854SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 21865f44c854SHong Zhang uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 21875f44c854SHong Zhang dk += uikdi * ba[ili]; /* update diag[k] */ 21885f44c854SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 21895f44c854SHong Zhang 21905f44c854SHong Zhang /* add multiple of row i to k-th row */ 21919371c9d4SSatish Balay jmin = ili + 1; 21929371c9d4SSatish Balay jmax = bi[i + 1]; 21935f44c854SHong Zhang if (jmin < jmax) { 21945f44c854SHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 21955f44c854SHong Zhang /* update il and c2r for row i */ 21965f44c854SHong Zhang il[i] = jmin; 21979371c9d4SSatish Balay j = bj[jmin]; 21989371c9d4SSatish Balay c2r[i] = c2r[j]; 21999371c9d4SSatish Balay c2r[j] = i; 22005f44c854SHong Zhang } 22015f44c854SHong Zhang i = nexti; 22025f44c854SHong Zhang } 22035f44c854SHong Zhang 22045f44c854SHong Zhang /* copy data into U(k,:) */ 220598a93161SHong Zhang rs = 0.0; 22069371c9d4SSatish Balay jmin = bi[k]; 22079371c9d4SSatish Balay jmax = bi[k + 1] - 1; 22085f44c854SHong Zhang if (jmin < jmax) { 22095f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 22109371c9d4SSatish Balay col = bj[j]; 22119371c9d4SSatish Balay ba[j] = rtmp[col]; 22129371c9d4SSatish Balay rs += PetscAbsScalar(ba[j]); 22135f44c854SHong Zhang } 22145f44c854SHong Zhang /* add the k-th row into il and c2r */ 22155f44c854SHong Zhang il[k] = jmin; 22169371c9d4SSatish Balay i = bj[jmin]; 22179371c9d4SSatish Balay c2r[k] = c2r[i]; 22189371c9d4SSatish Balay c2r[i] = k; 22195f44c854SHong Zhang } 222098a93161SHong Zhang 222198a93161SHong Zhang /* MatPivotCheck() */ 222298a93161SHong Zhang sctx.rs = rs; 222398a93161SHong Zhang sctx.pv = dk; 22249566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 222507b50cabSHong Zhang if (sctx.newshift) break; 222698a93161SHong Zhang dk = sctx.pv; 222798a93161SHong Zhang 222898a93161SHong Zhang ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 222998a93161SHong Zhang } 223007b50cabSHong Zhang } while (sctx.newshift); 22315f44c854SHong Zhang 2232b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 22339566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, c2r)); 22349566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 22359566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 22365f44c854SHong Zhang 22379566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 22385f44c854SHong Zhang if (perm_identity) { 223935233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 224035233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 22412169352eSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 22422169352eSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 22435f44c854SHong Zhang } else { 224435233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1; 224535233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 224635233d99SShri Abhyankar B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 224735233d99SShri Abhyankar B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 22485f44c854SHong Zhang } 22495f44c854SHong Zhang 22505f44c854SHong Zhang C->assembled = PETSC_TRUE; 22515f44c854SHong Zhang C->preallocated = PETSC_TRUE; 22522205254eSKarl Rupp 22539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 225498a93161SHong Zhang 225598a93161SHong Zhang /* MatPivotView() */ 225698a93161SHong Zhang if (sctx.nshift) { 2257f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 22589566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2259f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 22609566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2261f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 22629566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 226398a93161SHong Zhang } 226498a93161SHong Zhang } 22653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 22665f44c854SHong Zhang } 22675f44c854SHong Zhang 2268d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 2269d71ae5a4SJacob Faibussowitsch { 2270719d5645SBarry Smith Mat C = B; 22710968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 22722ed133dbSHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 22739bfd6278SHong Zhang IS ip = b->row, iip = b->icol; 22745d0c19d7SBarry Smith const PetscInt *rip, *riip; 2275c5546cabSHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 22762ed133dbSHong Zhang PetscInt *ai = a->i, *aj = a->j; 22772ed133dbSHong Zhang PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2278b65878eeSJunchao Zhang MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 2279b65878eeSJunchao Zhang const MatScalar *aa, *v; 2280ace3abfcSBarry Smith PetscBool perm_identity; 22810e95ead3SHong Zhang FactorShiftCtx sctx; 22820e95ead3SHong Zhang PetscReal rs; 2283b65878eeSJunchao Zhang MatScalar d; 2284d5d45c9bSBarry Smith 2285a6175056SHong Zhang PetscFunctionBegin; 2286b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 22870e95ead3SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 22889566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 22890e95ead3SHong Zhang 22900e95ead3SHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 22910e95ead3SHong Zhang sctx.shift_top = info->zeropivot; 22920e95ead3SHong Zhang for (i = 0; i < mbs; i++) { 22930e95ead3SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 22940e95ead3SHong Zhang d = (aa)[a->diag[i]]; 22950e95ead3SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 22960e95ead3SHong Zhang v = aa + ai[i]; 22970e95ead3SHong Zhang nz = ai[i + 1] - ai[i]; 22982205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 22990e95ead3SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 23000e95ead3SHong Zhang } 23010e95ead3SHong Zhang sctx.shift_top *= 1.1; 23020e95ead3SHong Zhang sctx.nshift_max = 5; 23030e95ead3SHong Zhang sctx.shift_lo = 0.; 23040e95ead3SHong Zhang sctx.shift_hi = 1.; 23050e95ead3SHong Zhang } 2306ee45ca4aSHong Zhang 23079566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 23089566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 23092ed133dbSHong Zhang 23102ed133dbSHong Zhang /* initialization */ 23119566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 23120e95ead3SHong Zhang 23132ed133dbSHong Zhang do { 231407b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 23150e95ead3SHong Zhang 2316c5546cabSHong Zhang for (i = 0; i < mbs; i++) jl[i] = mbs; 2317c5546cabSHong Zhang il[0] = 0; 23182ed133dbSHong Zhang 23192ed133dbSHong Zhang for (k = 0; k < mbs; k++) { 2320c5546cabSHong Zhang /* zero rtmp */ 2321c5546cabSHong Zhang nz = bi[k + 1] - bi[k]; 2322c5546cabSHong Zhang bjtmp = bj + bi[k]; 2323c5546cabSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2324c5546cabSHong Zhang 2325c05c3958SHong Zhang bval = ba + bi[k]; 23262ed133dbSHong Zhang /* initialize k-th row by the perm[k]-th row of A */ 23279371c9d4SSatish Balay jmin = ai[rip[k]]; 23289371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 23292ed133dbSHong Zhang for (j = jmin; j < jmax; j++) { 23309bfd6278SHong Zhang col = riip[aj[j]]; 23312ed133dbSHong Zhang if (col >= k) { /* only take upper triangular entry */ 23322ed133dbSHong Zhang rtmp[col] = aa[j]; 2333c05c3958SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 23342ed133dbSHong Zhang } 23352ed133dbSHong Zhang } 233639e3d630SHong Zhang /* shift the diagonal of the matrix */ 2337540703acSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount; 23382ed133dbSHong Zhang 23392ed133dbSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 23402ed133dbSHong Zhang dk = rtmp[k]; 23412ed133dbSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 23422ed133dbSHong Zhang 23432ed133dbSHong Zhang while (i < k) { 23442ed133dbSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 23452ed133dbSHong Zhang 23462ed133dbSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 23472ed133dbSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 23482ed133dbSHong Zhang uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 23492ed133dbSHong Zhang dk += uikdi * ba[ili]; 23502ed133dbSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 23512ed133dbSHong Zhang 23522ed133dbSHong Zhang /* add multiple of row i to k-th row */ 23539371c9d4SSatish Balay jmin = ili + 1; 23549371c9d4SSatish Balay jmax = bi[i + 1]; 23552ed133dbSHong Zhang if (jmin < jmax) { 23562ed133dbSHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 23572ed133dbSHong Zhang /* update il and jl for row i */ 23582ed133dbSHong Zhang il[i] = jmin; 23599371c9d4SSatish Balay j = bj[jmin]; 23609371c9d4SSatish Balay jl[i] = jl[j]; 23619371c9d4SSatish Balay jl[j] = i; 23622ed133dbSHong Zhang } 23632ed133dbSHong Zhang i = nexti; 23642ed133dbSHong Zhang } 23652ed133dbSHong Zhang 23660697b6caSHong Zhang /* shift the diagonals when zero pivot is detected */ 23670697b6caSHong Zhang /* compute rs=sum of abs(off-diagonal) */ 23680697b6caSHong Zhang rs = 0.0; 23693c9e8598SHong Zhang jmin = bi[k] + 1; 23703c9e8598SHong Zhang nz = bi[k + 1] - jmin; 23713c9e8598SHong Zhang bcol = bj + jmin; 2372ad540459SPierre Jolivet for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2373540703acSHong Zhang 2374540703acSHong Zhang sctx.rs = rs; 2375540703acSHong Zhang sctx.pv = dk; 23769566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 237707b50cabSHong Zhang if (sctx.newshift) break; 23780e95ead3SHong Zhang dk = sctx.pv; 23793c9e8598SHong Zhang 23803c9e8598SHong Zhang /* copy data into U(k,:) */ 238139e3d630SHong Zhang ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 23829371c9d4SSatish Balay jmin = bi[k] + 1; 23839371c9d4SSatish Balay jmax = bi[k + 1]; 238439e3d630SHong Zhang if (jmin < jmax) { 238539e3d630SHong Zhang for (j = jmin; j < jmax; j++) { 23869371c9d4SSatish Balay col = bj[j]; 23879371c9d4SSatish Balay ba[j] = rtmp[col]; 23883c9e8598SHong Zhang } 238939e3d630SHong Zhang /* add the k-th row into il and jl */ 23903c9e8598SHong Zhang il[k] = jmin; 23919371c9d4SSatish Balay i = bj[jmin]; 23929371c9d4SSatish Balay jl[k] = jl[i]; 23939371c9d4SSatish Balay jl[i] = k; 23943c9e8598SHong Zhang } 23953c9e8598SHong Zhang } 239607b50cabSHong Zhang } while (sctx.newshift); 23970e95ead3SHong Zhang 23989566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, jl)); 23999566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 24009566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 2401b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 2402db4efbfdSBarry Smith 24039566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 2404db4efbfdSBarry Smith if (perm_identity) { 24050a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 24060a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 24070a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 24080a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2409db4efbfdSBarry Smith } else { 24100a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 24110a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 24120a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 24130a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2414db4efbfdSBarry Smith } 2415db4efbfdSBarry Smith 24163c9e8598SHong Zhang C->assembled = PETSC_TRUE; 24173c9e8598SHong Zhang C->preallocated = PETSC_TRUE; 24182205254eSKarl Rupp 24199566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 2420540703acSHong Zhang if (sctx.nshift) { 2421f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 24229566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2423f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 24249566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 24250697b6caSHong Zhang } 24263c9e8598SHong Zhang } 24273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24283c9e8598SHong Zhang } 2429a6175056SHong Zhang 24308a76ed4fSHong Zhang /* 24318a76ed4fSHong Zhang icc() under revised new data structure. 24328a76ed4fSHong Zhang Factored arrays bj and ba are stored as 24338a76ed4fSHong Zhang U(0,:),...,U(i,:),U(n-1,:) 24348a76ed4fSHong Zhang 24358a76ed4fSHong Zhang ui=fact->i is an array of size n+1, in which 24368a76ed4fSHong Zhang ui+ 24378a76ed4fSHong Zhang ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 24388a76ed4fSHong Zhang ui[n]: points to U(n-1,n-1)+1 24398a76ed4fSHong Zhang 24408a76ed4fSHong Zhang udiag=fact->diag is an array of size n,in which 24418a76ed4fSHong Zhang udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 24428a76ed4fSHong Zhang 24438a76ed4fSHong Zhang U(i,:) contains udiag[i] as its last entry, i.e., 24448a76ed4fSHong Zhang U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 24458a76ed4fSHong Zhang */ 24468a76ed4fSHong Zhang 2447d71ae5a4SJacob Faibussowitsch PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2448d71ae5a4SJacob Faibussowitsch { 24490968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2450ed59401aSHong Zhang Mat_SeqSBAIJ *b; 2451ace3abfcSBarry Smith PetscBool perm_identity, missing; 24525f44c854SHong Zhang PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 24535d0c19d7SBarry Smith const PetscInt *rip, *riip; 2454ed59401aSHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 24550298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 24565a8e39fbSHong Zhang PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2457ed59401aSHong Zhang PetscReal fill = info->fill, levels = info->levels; 24580298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 24590298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 24607a48dd6fSHong Zhang PetscBT lnkbt; 2461b635d36bSHong Zhang IS iperm; 2462a6175056SHong Zhang 2463a6175056SHong Zhang PetscFunctionBegin; 246408401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 24659566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &d)); 246628b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 24679566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 24689566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 24697a48dd6fSHong Zhang 24709f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 24719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 247239e3d630SHong Zhang ui[0] = 0; 247339e3d630SHong Zhang 24748a76ed4fSHong Zhang /* ICC(0) without matrix ordering: simply rearrange column indices */ 2475622e2028SHong Zhang if (!levels && perm_identity) { 24765f44c854SHong Zhang for (i = 0; i < am; i++) { 2477c5546cabSHong Zhang ncols = ai[i + 1] - a->diag[i]; 2478c5546cabSHong Zhang ui[i + 1] = ui[i] + ncols; 2479c5546cabSHong Zhang udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2480dc1e2a3fSHong Zhang } 24819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2482dc1e2a3fSHong Zhang cols = uj; 2483dc1e2a3fSHong Zhang for (i = 0; i < am; i++) { 24845f44c854SHong Zhang aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2485dc1e2a3fSHong Zhang ncols = ai[i + 1] - a->diag[i] - 1; 24865f44c854SHong Zhang for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2487910cf402Sprj- *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 24885f44c854SHong Zhang } 24895f44c854SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 24909566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 24919566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 24925f44c854SHong Zhang 24935f44c854SHong Zhang /* initialization */ 24949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ajtmp)); 24955f44c854SHong Zhang 24965f44c854SHong Zhang /* jl: linked list for storing indices of the pivot rows 24975f44c854SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 24989566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 24995f44c854SHong Zhang for (i = 0; i < am; i++) { 25009371c9d4SSatish Balay jl[i] = am; 25019371c9d4SSatish Balay il[i] = 0; 25025f44c854SHong Zhang } 25035f44c854SHong Zhang 25045f44c854SHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 25055f44c854SHong Zhang nlnk = am + 1; 25069566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 25075f44c854SHong Zhang 250895b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 25099566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 25105f44c854SHong Zhang current_space = free_space; 25119566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 25125f44c854SHong Zhang current_space_lvl = free_space_lvl; 25135f44c854SHong Zhang 25145f44c854SHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 25155f44c854SHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 25165f44c854SHong Zhang nzk = 0; 25175f44c854SHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 251828b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 25195f44c854SHong Zhang ncols_upper = 0; 25205f44c854SHong Zhang for (j = 0; j < ncols; j++) { 25215f44c854SHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 25225f44c854SHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 25235f44c854SHong Zhang ajtmp[ncols_upper] = i; 25245f44c854SHong Zhang ncols_upper++; 25255f44c854SHong Zhang } 25265f44c854SHong Zhang } 25279566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 25285f44c854SHong Zhang nzk += nlnk; 25295f44c854SHong Zhang 25305f44c854SHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 25315f44c854SHong Zhang prow = jl[k]; /* 1st pivot row */ 25325f44c854SHong Zhang 25335f44c854SHong Zhang while (prow < k) { 25345f44c854SHong Zhang nextprow = jl[prow]; 25355f44c854SHong Zhang 25365f44c854SHong Zhang /* merge prow into k-th row */ 25375f44c854SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 25385f44c854SHong Zhang jmax = ui[prow + 1]; 25395f44c854SHong Zhang ncols = jmax - jmin; 25405f44c854SHong Zhang i = jmin - ui[prow]; 25415f44c854SHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 25425f44c854SHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 25435f44c854SHong Zhang j = *(uj - 1); 25449566063dSJacob Faibussowitsch PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 25455f44c854SHong Zhang nzk += nlnk; 25465f44c854SHong Zhang 25475f44c854SHong Zhang /* update il and jl for prow */ 25485f44c854SHong Zhang if (jmin < jmax) { 25495f44c854SHong Zhang il[prow] = jmin; 25509371c9d4SSatish Balay j = *cols; 25519371c9d4SSatish Balay jl[prow] = jl[j]; 25529371c9d4SSatish Balay jl[j] = prow; 25535f44c854SHong Zhang } 25545f44c854SHong Zhang prow = nextprow; 25555f44c854SHong Zhang } 25565f44c854SHong Zhang 25575f44c854SHong Zhang /* if free space is not available, make more free space */ 25585f44c854SHong Zhang if (current_space->local_remaining < nzk) { 25595f44c854SHong Zhang i = am - k + 1; /* num of unfactored rows */ 2560f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 25619566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 25629566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 25635f44c854SHong Zhang reallocs++; 25645f44c854SHong Zhang } 25655f44c854SHong Zhang 25665f44c854SHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 256708401ef6SPierre Jolivet PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 25689566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 25695f44c854SHong Zhang 25705f44c854SHong Zhang /* add the k-th row into il and jl */ 25715f44c854SHong Zhang if (nzk > 1) { 25725f44c854SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 25739371c9d4SSatish Balay jl[k] = jl[i]; 25749371c9d4SSatish Balay jl[i] = k; 25755f44c854SHong Zhang il[k] = ui[k] + 1; 25765f44c854SHong Zhang } 25775f44c854SHong Zhang uj_ptr[k] = current_space->array; 25785f44c854SHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 25795f44c854SHong Zhang 25805f44c854SHong Zhang current_space->array += nzk; 25815f44c854SHong Zhang current_space->local_used += nzk; 25825f44c854SHong Zhang current_space->local_remaining -= nzk; 25835f44c854SHong Zhang 25845f44c854SHong Zhang current_space_lvl->array += nzk; 25855f44c854SHong Zhang current_space_lvl->local_used += nzk; 25865f44c854SHong Zhang current_space_lvl->local_remaining -= nzk; 25875f44c854SHong Zhang 25885f44c854SHong Zhang ui[k + 1] = ui[k] + nzk; 25895f44c854SHong Zhang } 25905f44c854SHong Zhang 25919566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 25929566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 25939566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 25949566063dSJacob Faibussowitsch PetscCall(PetscFree(ajtmp)); 25955f44c854SHong Zhang 25969263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 25979f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj)); 25989566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 25999566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 26009566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 26015f44c854SHong Zhang 26025f44c854SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 26035f44c854SHong Zhang 26045f44c854SHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 260557508eceSPierre Jolivet b = (Mat_SeqSBAIJ *)fact->data; 26069f0612e4SBarry Smith b->free_ij = PETSC_TRUE; 2607*84648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 26089f0612e4SBarry Smith b->free_a = PETSC_TRUE; 26095f44c854SHong Zhang b->j = uj; 26105f44c854SHong Zhang b->i = ui; 26115f44c854SHong Zhang b->diag = udiag; 26127f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 2613f4259b30SLisandro Dalcin b->ilen = NULL; 2614f4259b30SLisandro Dalcin b->imax = NULL; 26155f44c854SHong Zhang b->row = perm; 26165f44c854SHong Zhang b->col = perm; 26179566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 26189566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 26195f44c854SHong Zhang b->icol = iperm; 26205f44c854SHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 26212205254eSKarl Rupp 2622*84648c2dSPierre Jolivet PetscCall(PetscMalloc1(am, &b->solve_work)); 26232205254eSKarl Rupp 26245f44c854SHong Zhang b->maxnz = b->nz = ui[am]; 26255f44c854SHong Zhang 2626f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2627f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 26285f44c854SHong Zhang if (ai[am] != 0) { 26296a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 263095b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 26315f44c854SHong Zhang } else { 2632f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 26335f44c854SHong Zhang } 26349263d837SHong Zhang #if defined(PETSC_USE_INFO) 26359263d837SHong Zhang if (ai[am] != 0) { 26369263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 26379566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 26389566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 26399566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 26409263d837SHong Zhang } else { 26419566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 26429263d837SHong Zhang } 26439263d837SHong Zhang #endif 264435233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 26453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26465f44c854SHong Zhang } 26475f44c854SHong Zhang 2648ff6a9541SJacob Faibussowitsch #if 0 2649ff6a9541SJacob Faibussowitsch // unused 2650ff6a9541SJacob Faibussowitsch static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2651d71ae5a4SJacob Faibussowitsch { 26525f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 26535f44c854SHong Zhang Mat_SeqSBAIJ *b; 2654ace3abfcSBarry Smith PetscBool perm_identity, missing; 26555f44c854SHong Zhang PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 26565f44c854SHong Zhang const PetscInt *rip, *riip; 26575f44c854SHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 26580298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 26595f44c854SHong Zhang PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 26605f44c854SHong Zhang PetscReal fill = info->fill, levels = info->levels; 26610298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 26620298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 26635f44c854SHong Zhang PetscBT lnkbt; 26645f44c854SHong Zhang IS iperm; 26655f44c854SHong Zhang 26665f44c854SHong Zhang PetscFunctionBegin; 266708401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 26689566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &d)); 266928b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 26709566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 26719566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 26725f44c854SHong Zhang 26739f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui)); 26749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 26755f44c854SHong Zhang ui[0] = 0; 26765f44c854SHong Zhang 26775f44c854SHong Zhang /* ICC(0) without matrix ordering: simply copies fill pattern */ 26785f44c854SHong Zhang if (!levels && perm_identity) { 26795f44c854SHong Zhang for (i = 0; i < am; i++) { 26805f44c854SHong Zhang ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 26815f44c854SHong Zhang udiag[i] = ui[i]; 2682ed59401aSHong Zhang } 26839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 268439e3d630SHong Zhang cols = uj; 2685ed59401aSHong Zhang for (i = 0; i < am; i++) { 2686ed59401aSHong Zhang aj = a->j + a->diag[i]; 268739e3d630SHong Zhang ncols = ui[i + 1] - ui[i]; 268839e3d630SHong Zhang for (j = 0; j < ncols; j++) *cols++ = *aj++; 2689ed59401aSHong Zhang } 269039e3d630SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 26919566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 26929566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 2693b635d36bSHong Zhang 26947a48dd6fSHong Zhang /* initialization */ 26959566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ajtmp)); 26967a48dd6fSHong Zhang 26977a48dd6fSHong Zhang /* jl: linked list for storing indices of the pivot rows 26987a48dd6fSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 26999566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 27007a48dd6fSHong Zhang for (i = 0; i < am; i++) { 27019371c9d4SSatish Balay jl[i] = am; 27029371c9d4SSatish Balay il[i] = 0; 27037a48dd6fSHong Zhang } 27047a48dd6fSHong Zhang 27057a48dd6fSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 27067a48dd6fSHong Zhang nlnk = am + 1; 27079566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 27087a48dd6fSHong Zhang 27097a48dd6fSHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 27109566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 27117a48dd6fSHong Zhang current_space = free_space; 27129566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 27137a48dd6fSHong Zhang current_space_lvl = free_space_lvl; 27147a48dd6fSHong Zhang 27157a48dd6fSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 27167a48dd6fSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 27177a48dd6fSHong Zhang nzk = 0; 2718622e2028SHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 271928b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2720622e2028SHong Zhang ncols_upper = 0; 2721622e2028SHong Zhang for (j = 0; j < ncols; j++) { 2722b635d36bSHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2723b635d36bSHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 27245a8e39fbSHong Zhang ajtmp[ncols_upper] = i; 2725622e2028SHong Zhang ncols_upper++; 2726622e2028SHong Zhang } 2727622e2028SHong Zhang } 27289566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 27297a48dd6fSHong Zhang nzk += nlnk; 27307a48dd6fSHong Zhang 27317a48dd6fSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 27327a48dd6fSHong Zhang prow = jl[k]; /* 1st pivot row */ 27337a48dd6fSHong Zhang 27347a48dd6fSHong Zhang while (prow < k) { 27357a48dd6fSHong Zhang nextprow = jl[prow]; 27367a48dd6fSHong Zhang 27377a48dd6fSHong Zhang /* merge prow into k-th row */ 27387a48dd6fSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 27397a48dd6fSHong Zhang jmax = ui[prow + 1]; 27407a48dd6fSHong Zhang ncols = jmax - jmin; 2741ed59401aSHong Zhang i = jmin - ui[prow]; 2742ed59401aSHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 27435a8e39fbSHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 27445a8e39fbSHong Zhang j = *(uj - 1); 27459566063dSJacob Faibussowitsch PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 27467a48dd6fSHong Zhang nzk += nlnk; 27477a48dd6fSHong Zhang 27487a48dd6fSHong Zhang /* update il and jl for prow */ 27497a48dd6fSHong Zhang if (jmin < jmax) { 27507a48dd6fSHong Zhang il[prow] = jmin; 27519371c9d4SSatish Balay j = *cols; 27529371c9d4SSatish Balay jl[prow] = jl[j]; 27539371c9d4SSatish Balay jl[j] = prow; 27547a48dd6fSHong Zhang } 27557a48dd6fSHong Zhang prow = nextprow; 27567a48dd6fSHong Zhang } 27577a48dd6fSHong Zhang 27587a48dd6fSHong Zhang /* if free space is not available, make more free space */ 27597a48dd6fSHong Zhang if (current_space->local_remaining < nzk) { 27607a48dd6fSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2761f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 27629566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 27639566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 27647a48dd6fSHong Zhang reallocs++; 27657a48dd6fSHong Zhang } 27667a48dd6fSHong Zhang 27677a48dd6fSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 276828b400f6SJacob Faibussowitsch PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 27699566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 27707a48dd6fSHong Zhang 27717a48dd6fSHong Zhang /* add the k-th row into il and jl */ 277239e3d630SHong Zhang if (nzk > 1) { 27737a48dd6fSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 27749371c9d4SSatish Balay jl[k] = jl[i]; 27759371c9d4SSatish Balay jl[i] = k; 27767a48dd6fSHong Zhang il[k] = ui[k] + 1; 27777a48dd6fSHong Zhang } 27787a48dd6fSHong Zhang uj_ptr[k] = current_space->array; 27797a48dd6fSHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 27807a48dd6fSHong Zhang 27817a48dd6fSHong Zhang current_space->array += nzk; 27827a48dd6fSHong Zhang current_space->local_used += nzk; 27837a48dd6fSHong Zhang current_space->local_remaining -= nzk; 27847a48dd6fSHong Zhang 27857a48dd6fSHong Zhang current_space_lvl->array += nzk; 27867a48dd6fSHong Zhang current_space_lvl->local_used += nzk; 27877a48dd6fSHong Zhang current_space_lvl->local_remaining -= nzk; 27887a48dd6fSHong Zhang 27897a48dd6fSHong Zhang ui[k + 1] = ui[k] + nzk; 27907a48dd6fSHong Zhang } 27917a48dd6fSHong Zhang 27926cf91177SBarry Smith #if defined(PETSC_USE_INFO) 27937a48dd6fSHong Zhang if (ai[am] != 0) { 279439e3d630SHong Zhang PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 27959566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 27969566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 27979566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 27987a48dd6fSHong Zhang } else { 27999566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 28007a48dd6fSHong Zhang } 280163ba0a88SBarry Smith #endif 28027a48dd6fSHong Zhang 28039566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 28049566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 28059566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 28069566063dSJacob Faibussowitsch PetscCall(PetscFree(ajtmp)); 28077a48dd6fSHong Zhang 28087a48dd6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 28099f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj)); 28109566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 28119566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 28129566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 28137a48dd6fSHong Zhang 281439e3d630SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 281539e3d630SHong Zhang 28167a48dd6fSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 28177a48dd6fSHong Zhang 2818f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 28199f0612e4SBarry Smith b->free_ij = PETSC_TRUE; 28209f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a)); 28219f0612e4SBarry Smith b->free_a = PETSC_TRUE; 28227a48dd6fSHong Zhang b->j = uj; 28237a48dd6fSHong Zhang b->i = ui; 28245f44c854SHong Zhang b->diag = udiag; 28257f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 2826f4259b30SLisandro Dalcin b->ilen = NULL; 2827f4259b30SLisandro Dalcin b->imax = NULL; 28287a48dd6fSHong Zhang b->row = perm; 2829b635d36bSHong Zhang b->col = perm; 28302205254eSKarl Rupp 28319566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 28329566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 28332205254eSKarl Rupp 2834b635d36bSHong Zhang b->icol = iperm; 28357a48dd6fSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 28369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 28377a48dd6fSHong Zhang b->maxnz = b->nz = ui[am]; 28387a48dd6fSHong Zhang 2839f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2840f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 28417a48dd6fSHong Zhang if (ai[am] != 0) { 2842f284f12aSHong Zhang fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 28437a48dd6fSHong Zhang } else { 2844f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 28457a48dd6fSHong Zhang } 28460a3c351aSHong Zhang fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 28473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2848a6175056SHong Zhang } 2849ff6a9541SJacob Faibussowitsch #endif 2850d5d45c9bSBarry Smith 2851d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2852d71ae5a4SJacob Faibussowitsch { 28530be760fbSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 28540be760fbSHong Zhang Mat_SeqSBAIJ *b; 28559186b105SHong Zhang PetscBool perm_identity, missing; 28560be760fbSHong Zhang PetscReal fill = info->fill; 28570be760fbSHong Zhang const PetscInt *rip, *riip; 28580be760fbSHong Zhang PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 28590be760fbSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 28600be760fbSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 28610298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 28620be760fbSHong Zhang PetscBT lnkbt; 28630be760fbSHong Zhang IS iperm; 28640be760fbSHong Zhang 28650be760fbSHong Zhang PetscFunctionBegin; 286608401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 28679566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 286828b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 28699186b105SHong Zhang 28700be760fbSHong Zhang /* check whether perm is the identity mapping */ 28719566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 28729566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 28739566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 28749566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 28750be760fbSHong Zhang 28760be760fbSHong Zhang /* initialization */ 28779f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 28789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 28790be760fbSHong Zhang ui[0] = 0; 28800be760fbSHong Zhang 28810be760fbSHong Zhang /* jl: linked list for storing indices of the pivot rows 28820be760fbSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 28839566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 28840be760fbSHong Zhang for (i = 0; i < am; i++) { 28859371c9d4SSatish Balay jl[i] = am; 28869371c9d4SSatish Balay il[i] = 0; 28870be760fbSHong Zhang } 28880be760fbSHong Zhang 28890be760fbSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 28900be760fbSHong Zhang nlnk = am + 1; 28919566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 28920be760fbSHong Zhang 289395b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 28949566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 28950be760fbSHong Zhang current_space = free_space; 28960be760fbSHong Zhang 28970be760fbSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 28980be760fbSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 28990be760fbSHong Zhang nzk = 0; 29000be760fbSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 290128b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 29020be760fbSHong Zhang ncols_upper = 0; 29030be760fbSHong Zhang for (j = 0; j < ncols; j++) { 29040be760fbSHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 29050be760fbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 29060be760fbSHong Zhang cols[ncols_upper] = i; 29070be760fbSHong Zhang ncols_upper++; 29080be760fbSHong Zhang } 29090be760fbSHong Zhang } 29109566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 29110be760fbSHong Zhang nzk += nlnk; 29120be760fbSHong Zhang 29130be760fbSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 29140be760fbSHong Zhang prow = jl[k]; /* 1st pivot row */ 29150be760fbSHong Zhang 29160be760fbSHong Zhang while (prow < k) { 29170be760fbSHong Zhang nextprow = jl[prow]; 29180be760fbSHong Zhang /* merge prow into k-th row */ 29190be760fbSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 29200be760fbSHong Zhang jmax = ui[prow + 1]; 29210be760fbSHong Zhang ncols = jmax - jmin; 29220be760fbSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 29239566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 29240be760fbSHong Zhang nzk += nlnk; 29250be760fbSHong Zhang 29260be760fbSHong Zhang /* update il and jl for prow */ 29270be760fbSHong Zhang if (jmin < jmax) { 29280be760fbSHong Zhang il[prow] = jmin; 29292205254eSKarl Rupp j = *uj_ptr; 29302205254eSKarl Rupp jl[prow] = jl[j]; 29312205254eSKarl Rupp jl[j] = prow; 29320be760fbSHong Zhang } 29330be760fbSHong Zhang prow = nextprow; 29340be760fbSHong Zhang } 29350be760fbSHong Zhang 29360be760fbSHong Zhang /* if free space is not available, make more free space */ 29370be760fbSHong Zhang if (current_space->local_remaining < nzk) { 29380be760fbSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2939f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 29409566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 29410be760fbSHong Zhang reallocs++; 29420be760fbSHong Zhang } 29430be760fbSHong Zhang 29440be760fbSHong Zhang /* copy data into free space, then initialize lnk */ 29459566063dSJacob Faibussowitsch PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 29460be760fbSHong Zhang 29470be760fbSHong Zhang /* add the k-th row into il and jl */ 29487b056e98SHong Zhang if (nzk > 1) { 29490be760fbSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 29509371c9d4SSatish Balay jl[k] = jl[i]; 29519371c9d4SSatish Balay jl[i] = k; 29520be760fbSHong Zhang il[k] = ui[k] + 1; 29530be760fbSHong Zhang } 29540be760fbSHong Zhang ui_ptr[k] = current_space->array; 29552205254eSKarl Rupp 29560be760fbSHong Zhang current_space->array += nzk; 29570be760fbSHong Zhang current_space->local_used += nzk; 29580be760fbSHong Zhang current_space->local_remaining -= nzk; 29590be760fbSHong Zhang 29600be760fbSHong Zhang ui[k + 1] = ui[k] + nzk; 29610be760fbSHong Zhang } 29620be760fbSHong Zhang 29639566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 29649566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 29659566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 29660be760fbSHong Zhang 29679263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2968*84648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj)); 29699566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 29709566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 29710be760fbSHong Zhang 29720be760fbSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 2973f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 29740be760fbSHong Zhang b->free_ij = PETSC_TRUE; 2975*84648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 29769f0612e4SBarry Smith b->free_a = PETSC_TRUE; 29770be760fbSHong Zhang b->j = uj; 29780be760fbSHong Zhang b->i = ui; 29790be760fbSHong Zhang b->diag = udiag; 29800be760fbSHong Zhang b->free_diag = PETSC_TRUE; 2981f4259b30SLisandro Dalcin b->ilen = NULL; 2982f4259b30SLisandro Dalcin b->imax = NULL; 29830be760fbSHong Zhang b->row = perm; 29840be760fbSHong Zhang b->col = perm; 298526fbe8dcSKarl Rupp 29869566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 29879566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 29882205254eSKarl Rupp 29890be760fbSHong Zhang b->icol = iperm; 29900be760fbSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 299126fbe8dcSKarl Rupp 2992*84648c2dSPierre Jolivet PetscCall(PetscMalloc1(am, &b->solve_work)); 29932205254eSKarl Rupp 29940be760fbSHong Zhang b->maxnz = b->nz = ui[am]; 29950be760fbSHong Zhang 2996f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2997f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 29980be760fbSHong Zhang if (ai[am] != 0) { 29996a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 300095b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 30010be760fbSHong Zhang } else { 3002f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 30030be760fbSHong Zhang } 30049263d837SHong Zhang #if defined(PETSC_USE_INFO) 30059263d837SHong Zhang if (ai[am] != 0) { 30069263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 30079566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 30089566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 30099566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 30109263d837SHong Zhang } else { 30119566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 30129263d837SHong Zhang } 30139263d837SHong Zhang #endif 301435233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 30153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30160be760fbSHong Zhang } 30170be760fbSHong Zhang 3018ff6a9541SJacob Faibussowitsch #if 0 3019ff6a9541SJacob Faibussowitsch // unused 3020ff6a9541SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 3021d71ae5a4SJacob Faibussowitsch { 3022f76d2b81SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 302310c27e3dSHong Zhang Mat_SeqSBAIJ *b; 30249186b105SHong Zhang PetscBool perm_identity, missing; 302510c27e3dSHong Zhang PetscReal fill = info->fill; 30265d0c19d7SBarry Smith const PetscInt *rip, *riip; 30275d0c19d7SBarry Smith PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 302810c27e3dSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 30292ed133dbSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 30300298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 303110c27e3dSHong Zhang PetscBT lnkbt; 30322ed133dbSHong Zhang IS iperm; 3033f76d2b81SHong Zhang 3034f76d2b81SHong Zhang PetscFunctionBegin; 303508401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 30369566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 303728b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 30389186b105SHong Zhang 30392ed133dbSHong Zhang /* check whether perm is the identity mapping */ 30409566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 30419566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 30429566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 30439566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 304410c27e3dSHong Zhang 304510c27e3dSHong Zhang /* initialization */ 30469f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui)); 304710c27e3dSHong Zhang ui[0] = 0; 304810c27e3dSHong Zhang 304910c27e3dSHong Zhang /* jl: linked list for storing indices of the pivot rows 30501393bc97SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 30519566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 30521393bc97SHong Zhang for (i = 0; i < am; i++) { 30539371c9d4SSatish Balay jl[i] = am; 30549371c9d4SSatish Balay il[i] = 0; 3055f76d2b81SHong Zhang } 305610c27e3dSHong Zhang 305710c27e3dSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 30581393bc97SHong Zhang nlnk = am + 1; 30599566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 306010c27e3dSHong Zhang 30611393bc97SHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 30629566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 306310c27e3dSHong Zhang current_space = free_space; 306410c27e3dSHong Zhang 30651393bc97SHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 306610c27e3dSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 306710c27e3dSHong Zhang nzk = 0; 30682ed133dbSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 306994bad497SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 30702ed133dbSHong Zhang ncols_upper = 0; 30712ed133dbSHong Zhang for (j = 0; j < ncols; j++) { 30729bfd6278SHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 30732ed133dbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 30742ed133dbSHong Zhang cols[ncols_upper] = i; 30752ed133dbSHong Zhang ncols_upper++; 30762ed133dbSHong Zhang } 30772ed133dbSHong Zhang } 30789566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 307910c27e3dSHong Zhang nzk += nlnk; 308010c27e3dSHong Zhang 308110c27e3dSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 308210c27e3dSHong Zhang prow = jl[k]; /* 1st pivot row */ 308310c27e3dSHong Zhang 308410c27e3dSHong Zhang while (prow < k) { 308510c27e3dSHong Zhang nextprow = jl[prow]; 308610c27e3dSHong Zhang /* merge prow into k-th row */ 30871393bc97SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 308810c27e3dSHong Zhang jmax = ui[prow + 1]; 308910c27e3dSHong Zhang ncols = jmax - jmin; 30901393bc97SHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 30919566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 309210c27e3dSHong Zhang nzk += nlnk; 309310c27e3dSHong Zhang 309410c27e3dSHong Zhang /* update il and jl for prow */ 309510c27e3dSHong Zhang if (jmin < jmax) { 309610c27e3dSHong Zhang il[prow] = jmin; 30979371c9d4SSatish Balay j = *uj_ptr; 30989371c9d4SSatish Balay jl[prow] = jl[j]; 30999371c9d4SSatish Balay jl[j] = prow; 310010c27e3dSHong Zhang } 310110c27e3dSHong Zhang prow = nextprow; 310210c27e3dSHong Zhang } 310310c27e3dSHong Zhang 310410c27e3dSHong Zhang /* if free space is not available, make more free space */ 310510c27e3dSHong Zhang if (current_space->local_remaining < nzk) { 31061393bc97SHong Zhang i = am - k + 1; /* num of unfactored rows */ 310710c27e3dSHong Zhang i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 31089566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 310910c27e3dSHong Zhang reallocs++; 311010c27e3dSHong Zhang } 311110c27e3dSHong Zhang 311210c27e3dSHong Zhang /* copy data into free space, then initialize lnk */ 31139566063dSJacob Faibussowitsch PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 311410c27e3dSHong Zhang 311510c27e3dSHong Zhang /* add the k-th row into il and jl */ 311610c27e3dSHong Zhang if (nzk - 1 > 0) { 31171393bc97SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 31189371c9d4SSatish Balay jl[k] = jl[i]; 31199371c9d4SSatish Balay jl[i] = k; 312010c27e3dSHong Zhang il[k] = ui[k] + 1; 312110c27e3dSHong Zhang } 31222ed133dbSHong Zhang ui_ptr[k] = current_space->array; 31232205254eSKarl Rupp 312410c27e3dSHong Zhang current_space->array += nzk; 312510c27e3dSHong Zhang current_space->local_used += nzk; 312610c27e3dSHong Zhang current_space->local_remaining -= nzk; 312710c27e3dSHong Zhang 312810c27e3dSHong Zhang ui[k + 1] = ui[k] + nzk; 312910c27e3dSHong Zhang } 313010c27e3dSHong Zhang 31316cf91177SBarry Smith #if defined(PETSC_USE_INFO) 31321393bc97SHong Zhang if (ai[am] != 0) { 31334ad8454bSPierre Jolivet PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am]; 31349566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 31359566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 31369566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 313710c27e3dSHong Zhang } else { 31389566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 313910c27e3dSHong Zhang } 314063ba0a88SBarry Smith #endif 314110c27e3dSHong Zhang 31429566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 31439566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 31449566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 314510c27e3dSHong Zhang 314610c27e3dSHong Zhang /* destroy list of free space and other temporary array(s) */ 31479f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj)); 31489566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 31499566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 315010c27e3dSHong Zhang 315110c27e3dSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 3152f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 3153e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 31549f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a)); 31559f0612e4SBarry Smith b->free_a = PETSC_TRUE; 315610c27e3dSHong Zhang b->j = uj; 315710c27e3dSHong Zhang b->i = ui; 3158f4259b30SLisandro Dalcin b->diag = NULL; 3159f4259b30SLisandro Dalcin b->ilen = NULL; 3160f4259b30SLisandro Dalcin b->imax = NULL; 316110c27e3dSHong Zhang b->row = perm; 31629bfd6278SHong Zhang b->col = perm; 31632205254eSKarl Rupp 31649566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 31659566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 31662205254eSKarl Rupp 31679bfd6278SHong Zhang b->icol = iperm; 316810c27e3dSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 31692205254eSKarl Rupp 31709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 31711393bc97SHong Zhang b->maxnz = b->nz = ui[am]; 317210c27e3dSHong Zhang 3173f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 3174f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 31751393bc97SHong Zhang if (ai[am] != 0) { 31764ad8454bSPierre Jolivet fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am]; 317710c27e3dSHong Zhang } else { 3178f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 317910c27e3dSHong Zhang } 31800a3c351aSHong Zhang fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 31813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3182f76d2b81SHong Zhang } 3183ff6a9541SJacob Faibussowitsch #endif 3184599ef60dSHong Zhang 3185d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 3186d71ae5a4SJacob Faibussowitsch { 3187813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3188813a1f2bSShri Abhyankar PetscInt n = A->rmap->n; 3189813a1f2bSShri Abhyankar const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3190813a1f2bSShri Abhyankar PetscScalar *x, sum; 3191813a1f2bSShri Abhyankar const PetscScalar *b; 3192b65878eeSJunchao Zhang const MatScalar *aa, *v; 3193813a1f2bSShri Abhyankar PetscInt i, nz; 3194813a1f2bSShri Abhyankar 3195813a1f2bSShri Abhyankar PetscFunctionBegin; 31963ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3197813a1f2bSShri Abhyankar 3198b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 31999566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 32009566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 3201813a1f2bSShri Abhyankar 3202813a1f2bSShri Abhyankar /* forward solve the lower triangular */ 3203813a1f2bSShri Abhyankar x[0] = b[0]; 3204813a1f2bSShri Abhyankar v = aa; 3205813a1f2bSShri Abhyankar vi = aj; 3206813a1f2bSShri Abhyankar for (i = 1; i < n; i++) { 3207813a1f2bSShri Abhyankar nz = ai[i + 1] - ai[i]; 3208813a1f2bSShri Abhyankar sum = b[i]; 3209813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3210813a1f2bSShri Abhyankar v += nz; 3211813a1f2bSShri Abhyankar vi += nz; 3212813a1f2bSShri Abhyankar x[i] = sum; 3213813a1f2bSShri Abhyankar } 3214813a1f2bSShri Abhyankar 3215813a1f2bSShri Abhyankar /* backward solve the upper triangular */ 321662fbd6afSShri Abhyankar for (i = n - 1; i >= 0; i--) { 32173c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 32183c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 3219813a1f2bSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 3220813a1f2bSShri Abhyankar sum = x[i]; 3221813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 32223c0119dfSShri Abhyankar x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3223813a1f2bSShri Abhyankar } 3224813a1f2bSShri Abhyankar 32259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3226b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 32279566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 32289566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 32293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3230813a1f2bSShri Abhyankar } 3231813a1f2bSShri Abhyankar 3232d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 3233d71ae5a4SJacob Faibussowitsch { 3234f268cba8SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3235f268cba8SShri Abhyankar IS iscol = a->col, isrow = a->row; 3236f268cba8SShri Abhyankar PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3237f268cba8SShri Abhyankar const PetscInt *rout, *cout, *r, *c; 3238f268cba8SShri Abhyankar PetscScalar *x, *tmp, sum; 3239f268cba8SShri Abhyankar const PetscScalar *b; 3240b65878eeSJunchao Zhang const MatScalar *aa, *v; 3241f268cba8SShri Abhyankar 3242f268cba8SShri Abhyankar PetscFunctionBegin; 32433ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3244f268cba8SShri Abhyankar 3245b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 32469566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 32479566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 3248f268cba8SShri Abhyankar tmp = a->solve_work; 3249f268cba8SShri Abhyankar 32509371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 32519371c9d4SSatish Balay r = rout; 32529371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 32539371c9d4SSatish Balay c = cout; 3254f268cba8SShri Abhyankar 3255f268cba8SShri Abhyankar /* forward solve the lower triangular */ 3256f268cba8SShri Abhyankar tmp[0] = b[r[0]]; 3257f268cba8SShri Abhyankar v = aa; 3258f268cba8SShri Abhyankar vi = aj; 3259f268cba8SShri Abhyankar for (i = 1; i < n; i++) { 3260f268cba8SShri Abhyankar nz = ai[i + 1] - ai[i]; 3261f268cba8SShri Abhyankar sum = b[r[i]]; 3262f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3263f268cba8SShri Abhyankar tmp[i] = sum; 32649371c9d4SSatish Balay v += nz; 32659371c9d4SSatish Balay vi += nz; 3266f268cba8SShri Abhyankar } 3267f268cba8SShri Abhyankar 3268f268cba8SShri Abhyankar /* backward solve the upper triangular */ 3269f268cba8SShri Abhyankar for (i = n - 1; i >= 0; i--) { 32703c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 32713c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 3272f268cba8SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 3273f268cba8SShri Abhyankar sum = tmp[i]; 3274f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3275f268cba8SShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3276f268cba8SShri Abhyankar } 3277f268cba8SShri Abhyankar 32789566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 32799566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 3280b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 32819566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 32829566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 32839566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 32843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3285f268cba8SShri Abhyankar } 3286f268cba8SShri Abhyankar 3287ff6a9541SJacob Faibussowitsch #if 0 3288ff6a9541SJacob Faibussowitsch // unused 3289fe97e370SBarry Smith /* 32906aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3291fe97e370SBarry Smith */ 3292ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) 3293d71ae5a4SJacob Faibussowitsch { 32941d098868SHong Zhang Mat B = *fact; 3295599ef60dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3296599ef60dSHong Zhang IS isicol; 32971d098868SHong Zhang const PetscInt *r, *ic; 32981d098868SHong Zhang PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3299dc3a2fd3SHong Zhang PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3300f61a948aSLisandro Dalcin PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 33011d098868SHong Zhang PetscInt nlnk, *lnk; 33021d098868SHong Zhang PetscBT lnkbt; 3303ace3abfcSBarry Smith PetscBool row_identity, icol_identity; 33048aee2decSHong Zhang MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 33051d098868SHong Zhang const PetscInt *ics; 33061d098868SHong Zhang PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3307a2ea699eSBarry Smith PetscReal dt = info->dt, shift = info->shiftamount; 3308f61a948aSLisandro Dalcin PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3309ace3abfcSBarry Smith PetscBool missing; 3310599ef60dSHong Zhang 3311599ef60dSHong Zhang PetscFunctionBegin; 331213bcc0bdSJacob Faibussowitsch if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005; 3313f61a948aSLisandro Dalcin if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3314f61a948aSLisandro Dalcin 33152ef1f0ffSBarry Smith /* symbolic factorization, can be reused */ 33169566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 331728b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 33181d098868SHong Zhang adiag = a->diag; 3319599ef60dSHong Zhang 33209566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3321599ef60dSHong Zhang 3322599ef60dSHong Zhang /* bdiag is location of diagonal in factor */ 33239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 33249566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3325599ef60dSHong Zhang 33261d098868SHong Zhang /* allocate row pointers bi */ 33279f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi)); 33281d098868SHong Zhang 33291d098868SHong Zhang /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3330393d3a68SHong Zhang if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 33311d098868SHong Zhang nnz_max = ai[n] + 2 * n * dtcount + 2; 33328fc3a347SHong Zhang 33339f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj)); 33349f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba)); 3335599ef60dSHong Zhang 3336599ef60dSHong Zhang /* put together the new matrix */ 33379566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3338f284f12aSHong Zhang b = (Mat_SeqAIJ *)B->data; 3339599ef60dSHong Zhang b->free_a = PETSC_TRUE; 3340599ef60dSHong Zhang b->free_ij = PETSC_TRUE; 3341599ef60dSHong Zhang b->a = ba; 3342599ef60dSHong Zhang b->j = bj; 3343599ef60dSHong Zhang b->i = bi; 3344599ef60dSHong Zhang b->diag = bdiag; 3345f4259b30SLisandro Dalcin b->ilen = NULL; 3346f4259b30SLisandro Dalcin b->imax = NULL; 3347599ef60dSHong Zhang b->row = isrow; 3348599ef60dSHong Zhang b->col = iscol; 33499566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 33509566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 3351599ef60dSHong Zhang b->icol = isicol; 3352599ef60dSHong Zhang 33539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 33541d098868SHong Zhang b->maxnz = nnz_max; 3355599ef60dSHong Zhang 3356d5f3da31SBarry Smith B->factortype = MAT_FACTOR_ILUDT; 3357f284f12aSHong Zhang B->info.factor_mallocs = 0; 3358f284f12aSHong Zhang B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 33592ef1f0ffSBarry Smith /* end of symbolic factorization */ 3360599ef60dSHong Zhang 33619566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 33629566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 3363599ef60dSHong Zhang ics = ic; 3364599ef60dSHong Zhang 3365599ef60dSHong Zhang /* linked list for storing column indices of the active row */ 3366599ef60dSHong Zhang nlnk = n + 1; 33679566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 33681d098868SHong Zhang 33691d098868SHong Zhang /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 33709566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 33711d098868SHong Zhang /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 33729566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 33739566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, n)); 3374599ef60dSHong Zhang 3375599ef60dSHong Zhang bi[0] = 0; 33768fc3a347SHong Zhang bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3377dc3a2fd3SHong Zhang bdiag_rev[n] = bdiag[0]; 33788fc3a347SHong Zhang bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3379599ef60dSHong Zhang for (i = 0; i < n; i++) { 3380599ef60dSHong Zhang /* copy initial fill into linked list */ 33818369b28dSHong Zhang nzi = ai[r[i] + 1] - ai[r[i]]; 338294bad497SJacob Faibussowitsch PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 33838369b28dSHong Zhang nzi_al = adiag[r[i]] - ai[r[i]]; 33848369b28dSHong Zhang nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3385599ef60dSHong Zhang ajtmp = aj + ai[r[i]]; 33869566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3387599ef60dSHong Zhang 3388599ef60dSHong Zhang /* load in initial (unfactored row) */ 33891d098868SHong Zhang aatmp = a->a + ai[r[i]]; 3390ad540459SPierre Jolivet for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3391599ef60dSHong Zhang 3392599ef60dSHong Zhang /* add pivot rows into linked list */ 3393599ef60dSHong Zhang row = lnk[n]; 3394599ef60dSHong Zhang while (row < i) { 33951d098868SHong Zhang nzi_bl = bi[row + 1] - bi[row] + 1; 33961d098868SHong Zhang bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 33979566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3398599ef60dSHong Zhang nzi += nlnk; 3399599ef60dSHong Zhang row = lnk[row]; 3400599ef60dSHong Zhang } 3401599ef60dSHong Zhang 34028369b28dSHong Zhang /* copy data from lnk into jtmp, then initialize lnk */ 34039566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3404599ef60dSHong Zhang 3405599ef60dSHong Zhang /* numerical factorization */ 34068369b28dSHong Zhang bjtmp = jtmp; 3407599ef60dSHong Zhang row = *bjtmp++; /* 1st pivot row */ 3408599ef60dSHong Zhang while (row < i) { 3409599ef60dSHong Zhang pc = rtmp + row; 34103c2a7987SHong Zhang pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 34113c2a7987SHong Zhang multiplier = (*pc) * (*pv); 3412599ef60dSHong Zhang *pc = multiplier; 34133c2a7987SHong Zhang if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 34141d098868SHong Zhang pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 34151d098868SHong Zhang pv = ba + bdiag[row + 1] + 1; 34161d098868SHong Zhang nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 34171d098868SHong Zhang for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 34189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3419599ef60dSHong Zhang } 3420599ef60dSHong Zhang row = *bjtmp++; 3421599ef60dSHong Zhang } 3422599ef60dSHong Zhang 34238369b28dSHong Zhang /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 34248aee2decSHong Zhang diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 34259371c9d4SSatish Balay nzi_bl = 0; 34269371c9d4SSatish Balay j = 0; 34278369b28dSHong Zhang while (jtmp[j] < i) { /* Note: jtmp is sorted */ 34289371c9d4SSatish Balay vtmp[j] = rtmp[jtmp[j]]; 34299371c9d4SSatish Balay rtmp[jtmp[j]] = 0.0; 34309371c9d4SSatish Balay nzi_bl++; 34319371c9d4SSatish Balay j++; 34328369b28dSHong Zhang } 34338369b28dSHong Zhang nzi_bu = nzi - nzi_bl - 1; 34348369b28dSHong Zhang while (j < nzi) { 34359371c9d4SSatish Balay vtmp[j] = rtmp[jtmp[j]]; 34369371c9d4SSatish Balay rtmp[jtmp[j]] = 0.0; 34378369b28dSHong Zhang j++; 34388369b28dSHong Zhang } 34398369b28dSHong Zhang 3440599ef60dSHong Zhang bjtmp = bj + bi[i]; 3441599ef60dSHong Zhang batmp = ba + bi[i]; 34428369b28dSHong Zhang /* apply level dropping rule to L part */ 34438369b28dSHong Zhang ncut = nzi_al + dtcount; 34448369b28dSHong Zhang if (ncut < nzi_bl) { 34459566063dSJacob Faibussowitsch PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 34469566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3447599ef60dSHong Zhang } else { 34488369b28dSHong Zhang ncut = nzi_bl; 34498369b28dSHong Zhang } 34508369b28dSHong Zhang for (j = 0; j < ncut; j++) { 34518369b28dSHong Zhang bjtmp[j] = jtmp[j]; 34528369b28dSHong Zhang batmp[j] = vtmp[j]; 34538369b28dSHong Zhang } 34541d098868SHong Zhang bi[i + 1] = bi[i] + ncut; 34558369b28dSHong Zhang nzi = ncut + 1; 34568369b28dSHong Zhang 34578369b28dSHong Zhang /* apply level dropping rule to U part */ 34588369b28dSHong Zhang ncut = nzi_au + dtcount; 34598369b28dSHong Zhang if (ncut < nzi_bu) { 34609566063dSJacob Faibussowitsch PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 34619566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 34628369b28dSHong Zhang } else { 34638369b28dSHong Zhang ncut = nzi_bu; 34648369b28dSHong Zhang } 34658369b28dSHong Zhang nzi += ncut; 34661d098868SHong Zhang 34671d098868SHong Zhang /* mark bdiagonal */ 34681d098868SHong Zhang bdiag[i + 1] = bdiag[i] - (ncut + 1); 3469dc3a2fd3SHong Zhang bdiag_rev[n - i - 1] = bdiag[i + 1]; 34708fc3a347SHong Zhang bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 34711d098868SHong Zhang bjtmp = bj + bdiag[i]; 34721d098868SHong Zhang batmp = ba + bdiag[i]; 34731d098868SHong Zhang *bjtmp = i; 34748aee2decSHong Zhang *batmp = diag_tmp; /* rtmp[i]; */ 3475ad540459SPierre Jolivet if (*batmp == 0.0) *batmp = dt + shift; 3476a5b23f4aSJose E. Roman *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 34771d098868SHong Zhang 34781d098868SHong Zhang bjtmp = bj + bdiag[i + 1] + 1; 34791d098868SHong Zhang batmp = ba + bdiag[i + 1] + 1; 34808369b28dSHong Zhang for (k = 0; k < ncut; k++) { 34811d098868SHong Zhang bjtmp[k] = jtmp[nzi_bl + 1 + k]; 34821d098868SHong Zhang batmp[k] = vtmp[nzi_bl + 1 + k]; 3483599ef60dSHong Zhang } 3484599ef60dSHong Zhang 3485599ef60dSHong Zhang im[i] = nzi; /* used by PetscLLAddSortedLU() */ 34868369b28dSHong Zhang } /* for (i=0; i<n; i++) */ 348763a3b9bcSJacob Faibussowitsch PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3488599ef60dSHong Zhang 34899566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 34909566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 3491599ef60dSHong Zhang 34929566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 34939566063dSJacob Faibussowitsch PetscCall(PetscFree2(im, jtmp)); 34949566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, vtmp)); 34959566063dSJacob Faibussowitsch PetscCall(PetscFree(bdiag_rev)); 3496599ef60dSHong Zhang 34979566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n)); 34981d098868SHong Zhang b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3499599ef60dSHong Zhang 35009566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 35019566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &icol_identity)); 3502599ef60dSHong Zhang if (row_identity && icol_identity) { 350335233d99SShri Abhyankar B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3504599ef60dSHong Zhang } else { 350535233d99SShri Abhyankar B->ops->solve = MatSolve_SeqAIJ; 3506599ef60dSHong Zhang } 3507599ef60dSHong Zhang 3508f4259b30SLisandro Dalcin B->ops->solveadd = NULL; 3509f4259b30SLisandro Dalcin B->ops->solvetranspose = NULL; 3510f4259b30SLisandro Dalcin B->ops->solvetransposeadd = NULL; 3511f4259b30SLisandro Dalcin B->ops->matsolve = NULL; 3512599ef60dSHong Zhang B->assembled = PETSC_TRUE; 3513599ef60dSHong Zhang B->preallocated = PETSC_TRUE; 35143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3515599ef60dSHong Zhang } 3516599ef60dSHong Zhang 3517da81f932SPierre Jolivet /* a wrapper of MatILUDTFactor_SeqAIJ() */ 3518fe97e370SBarry Smith /* 35196aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3520fe97e370SBarry Smith */ 3521fe97e370SBarry Smith 3522ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 3523d71ae5a4SJacob Faibussowitsch { 35243c2a7987SHong Zhang PetscFunctionBegin; 35259566063dSJacob Faibussowitsch PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 35263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35273c2a7987SHong Zhang } 35283c2a7987SHong Zhang 35293c2a7987SHong Zhang /* 35303c2a7987SHong Zhang same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 35313c2a7987SHong Zhang - intend to replace existing MatLUFactorNumeric_SeqAIJ() 35323c2a7987SHong Zhang */ 3533fe97e370SBarry Smith /* 35346aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3535fe97e370SBarry Smith */ 3536fe97e370SBarry Smith 3537ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) 3538d71ae5a4SJacob Faibussowitsch { 35393c2a7987SHong Zhang Mat C = fact; 35403c2a7987SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 35413c2a7987SHong Zhang IS isrow = b->row, isicol = b->icol; 35423c2a7987SHong Zhang const PetscInt *r, *ic, *ics; 3543b78a477dSHong Zhang PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3544b78a477dSHong Zhang PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 35453c2a7987SHong Zhang MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3546182b8fbaSHong Zhang PetscReal dt = info->dt, shift = info->shiftamount; 3547ace3abfcSBarry Smith PetscBool row_identity, col_identity; 35483c2a7987SHong Zhang 35493c2a7987SHong Zhang PetscFunctionBegin; 35509566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 35519566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 35529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 35533c2a7987SHong Zhang ics = ic; 35543c2a7987SHong Zhang 35553c2a7987SHong Zhang for (i = 0; i < n; i++) { 3556b78a477dSHong Zhang /* initialize rtmp array */ 3557bbea24aaSStefano Zampini nzl = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */ 35583c2a7987SHong Zhang bjtmp = bj + bi[i]; 3559b78a477dSHong Zhang for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3560b78a477dSHong Zhang rtmp[i] = 0.0; 3561bbea24aaSStefano Zampini nzu = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */ 3562b78a477dSHong Zhang bjtmp = bj + bdiag[i + 1] + 1; 3563b78a477dSHong Zhang for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 35643c2a7987SHong Zhang 3565b78a477dSHong Zhang /* load in initial unfactored row of A */ 35663c2a7987SHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 35673c2a7987SHong Zhang ajtmp = aj + ai[r[i]]; 35683c2a7987SHong Zhang v = aa + ai[r[i]]; 3569ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 35703c2a7987SHong Zhang 3571b78a477dSHong Zhang /* numerical factorization */ 3572b78a477dSHong Zhang bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3573b78a477dSHong Zhang nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3574b78a477dSHong Zhang k = 0; 3575b78a477dSHong Zhang while (k < nzl) { 35763c2a7987SHong Zhang row = *bjtmp++; 35773c2a7987SHong Zhang pc = rtmp + row; 3578b78a477dSHong Zhang pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3579b78a477dSHong Zhang multiplier = (*pc) * (*pv); 35803c2a7987SHong Zhang *pc = multiplier; 3581b78a477dSHong Zhang if (PetscAbsScalar(multiplier) > dt) { 3582b78a477dSHong Zhang pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3583b78a477dSHong Zhang pv = b->a + bdiag[row + 1] + 1; 3584b78a477dSHong Zhang nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3585b78a477dSHong Zhang for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 35869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 35873c2a7987SHong Zhang } 3588b78a477dSHong Zhang k++; 35893c2a7987SHong Zhang } 35903c2a7987SHong Zhang 3591b78a477dSHong Zhang /* finished row so stick it into b->a */ 3592b78a477dSHong Zhang /* L-part */ 3593b78a477dSHong Zhang pv = b->a + bi[i]; 3594b78a477dSHong Zhang pj = bj + bi[i]; 3595b78a477dSHong Zhang nzl = bi[i + 1] - bi[i]; 3596ad540459SPierre Jolivet for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3597b78a477dSHong Zhang 3598a5b23f4aSJose E. Roman /* diagonal: invert diagonal entries for simpler triangular solves */ 3599b78a477dSHong Zhang if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3600b78a477dSHong Zhang b->a[bdiag[i]] = 1.0 / rtmp[i]; 3601b78a477dSHong Zhang 3602b78a477dSHong Zhang /* U-part */ 3603b78a477dSHong Zhang pv = b->a + bdiag[i + 1] + 1; 3604b78a477dSHong Zhang pj = bj + bdiag[i + 1] + 1; 3605b78a477dSHong Zhang nzu = bdiag[i] - bdiag[i + 1] - 1; 3606ad540459SPierre Jolivet for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3607b78a477dSHong Zhang } 3608b78a477dSHong Zhang 36099566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 36109566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 36119566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 3612b78a477dSHong Zhang 36139566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 36149566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 36153c2a7987SHong Zhang if (row_identity && col_identity) { 361635233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 36173c2a7987SHong Zhang } else { 361835233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 36193c2a7987SHong Zhang } 3620f4259b30SLisandro Dalcin C->ops->solveadd = NULL; 3621f4259b30SLisandro Dalcin C->ops->solvetranspose = NULL; 3622f4259b30SLisandro Dalcin C->ops->solvetransposeadd = NULL; 3623f4259b30SLisandro Dalcin C->ops->matsolve = NULL; 36243c2a7987SHong Zhang C->assembled = PETSC_TRUE; 36253c2a7987SHong Zhang C->preallocated = PETSC_TRUE; 36262205254eSKarl Rupp 36279566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 36283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 36293c2a7987SHong Zhang } 3630ff6a9541SJacob Faibussowitsch #endif 3631