1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 3c6db04a5SJed Brown #include <petscbt.h> 4c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 53b2fbd54SBarry Smith 6d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 7d71ae5a4SJacob Faibussowitsch { 84ac6704cSBarry Smith PetscFunctionBegin; 94ac6704cSBarry Smith *type = MATSOLVERPETSC; 103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 114ac6704cSBarry Smith } 124ac6704cSBarry Smith 13d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 14d71ae5a4SJacob Faibussowitsch { 15d0f46423SBarry Smith PetscInt n = A->rmap->n; 16b24902e0SBarry Smith 17b24902e0SBarry Smith PetscFunctionBegin; 18*fc2fb351SPierre Jolivet if (PetscDefined(USE_COMPLEX) && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 1903e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n")); 2003e5aca4SStefano Zampini *B = NULL; 2103e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 2203e5aca4SStefano Zampini } 2303e5aca4SStefano Zampini 249566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 259566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, n, n, n, n)); 26599ef60dSHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 279566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQAIJ)); 282205254eSKarl Rupp 2935233d99SShri Abhyankar (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 3035233d99SShri Abhyankar (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 312205254eSKarl Rupp 329566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 339566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 349566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 359566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 36b24902e0SBarry Smith } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 379566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQSBAIJ)); 389566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 392205254eSKarl Rupp 4035233d99SShri Abhyankar (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 4135233d99SShri Abhyankar (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 429566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 439566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 44e32f2f54SBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 45d5f3da31SBarry Smith (*B)->factortype = ftype; 4600c67f3bSHong Zhang 479566063dSJacob Faibussowitsch PetscCall(PetscFree((*B)->solvertype)); 489566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 49f73b0415SBarry Smith (*B)->canuseordering = PETSC_TRUE; 509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 52b24902e0SBarry Smith } 53b24902e0SBarry Smith 54d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 55d71ae5a4SJacob Faibussowitsch { 56ce3d78c0SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 57ce3d78c0SShri Abhyankar IS isicol; 588758e1faSBarry Smith const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 598758e1faSBarry Smith PetscInt i, n = A->rmap->n; 608758e1faSBarry Smith PetscInt *bi, *bj; 61ce3d78c0SShri Abhyankar PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 62ce3d78c0SShri Abhyankar PetscReal f; 63ce3d78c0SShri Abhyankar PetscInt nlnk, *lnk, k, **bi_ptr; 640298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 65ce3d78c0SShri Abhyankar PetscBT lnkbt; 66421480d9SBarry Smith PetscBool diagDense; 67ce3d78c0SShri Abhyankar 68ce3d78c0SShri Abhyankar PetscFunctionBegin; 6908401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 70421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 71421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 72ece542f9SHong Zhang 739566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 749566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 759566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 76ce3d78c0SShri Abhyankar 771bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 789f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 80a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 819b48462bSShri Abhyankar 829b48462bSShri Abhyankar /* linked list for storing column indices of the active row */ 839b48462bSShri Abhyankar nlnk = n + 1; 849566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 859b48462bSShri Abhyankar 869566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 879b48462bSShri Abhyankar 889b48462bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 899b48462bSShri Abhyankar f = info->fill; 90aa91b3e7SRichard Tran Mills if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 919566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 929b48462bSShri Abhyankar current_space = free_space; 939b48462bSShri Abhyankar 949b48462bSShri Abhyankar for (i = 0; i < n; i++) { 959b48462bSShri Abhyankar /* copy previous fill into linked list */ 969b48462bSShri Abhyankar nzi = 0; 979b48462bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 989b48462bSShri Abhyankar ajtmp = aj + ai[r[i]]; 999566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 1009b48462bSShri Abhyankar nzi += nlnk; 1019b48462bSShri Abhyankar 1029b48462bSShri Abhyankar /* add pivot rows into linked list */ 1039b48462bSShri Abhyankar row = lnk[n]; 1049b48462bSShri Abhyankar while (row < i) { 1059b48462bSShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 1069b48462bSShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 1079566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 1089b48462bSShri Abhyankar nzi += nlnk; 1099b48462bSShri Abhyankar row = lnk[row]; 1109b48462bSShri Abhyankar } 1119b48462bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 1129b48462bSShri Abhyankar im[i] = nzi; 1139b48462bSShri Abhyankar 1149b48462bSShri Abhyankar /* mark bdiag */ 1159b48462bSShri Abhyankar nzbd = 0; 1169b48462bSShri Abhyankar nnz = nzi; 1179b48462bSShri Abhyankar k = lnk[n]; 1189b48462bSShri Abhyankar while (nnz-- && k < i) { 1199b48462bSShri Abhyankar nzbd++; 1209b48462bSShri Abhyankar k = lnk[k]; 1219b48462bSShri Abhyankar } 1229b48462bSShri Abhyankar bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 1239b48462bSShri Abhyankar 1249b48462bSShri Abhyankar /* if free space is not available, make more free space */ 1259b48462bSShri Abhyankar if (current_space->local_remaining < nzi) { 1262f18eb33SBarry Smith /* estimated additional space needed */ 1271df4278eSBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 1289566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1299b48462bSShri Abhyankar reallocs++; 1309b48462bSShri Abhyankar } 1319b48462bSShri Abhyankar 1329b48462bSShri Abhyankar /* copy data into free space, then initialize lnk */ 1339566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 1342205254eSKarl Rupp 1359b48462bSShri Abhyankar bi_ptr[i] = current_space->array; 1369b48462bSShri Abhyankar current_space->array += nzi; 1379b48462bSShri Abhyankar current_space->local_used += nzi; 1389b48462bSShri Abhyankar current_space->local_remaining -= nzi; 1399b48462bSShri Abhyankar } 1409b48462bSShri Abhyankar 1419566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 1429566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 1439b48462bSShri Abhyankar 1449263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 14584648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 1469566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1479566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 1489566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 1499b48462bSShri Abhyankar 1509b48462bSShri Abhyankar /* put together the new matrix */ 1519566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 15257508eceSPierre Jolivet b = (Mat_SeqAIJ *)B->data; 1539b48462bSShri Abhyankar b->free_ij = PETSC_TRUE; 1549f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 1559f0612e4SBarry Smith b->free_a = PETSC_TRUE; 1569b48462bSShri Abhyankar b->j = bj; 1579b48462bSShri Abhyankar b->i = bi; 1589b48462bSShri Abhyankar b->diag = bdiag; 159f4259b30SLisandro Dalcin b->ilen = NULL; 160f4259b30SLisandro Dalcin b->imax = NULL; 1619b48462bSShri Abhyankar b->row = isrow; 1629b48462bSShri Abhyankar b->col = iscol; 1639566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 1649566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 1659b48462bSShri Abhyankar b->icol = isicol; 16684648c2dSPierre Jolivet PetscCall(PetscMalloc1(n, &b->solve_work)); 1679b48462bSShri Abhyankar 1689b48462bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 1699b48462bSShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 1702205254eSKarl Rupp 171d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 172f284f12aSHong Zhang B->info.factor_mallocs = reallocs; 173f284f12aSHong Zhang B->info.fill_ratio_given = f; 1749b48462bSShri Abhyankar 1759b48462bSShri Abhyankar if (ai[n]) { 176f284f12aSHong Zhang B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1779b48462bSShri Abhyankar } else { 178f284f12aSHong Zhang B->info.fill_ratio_needed = 0.0; 1799b48462bSShri Abhyankar } 1809263d837SHong Zhang #if defined(PETSC_USE_INFO) 1819263d837SHong Zhang if (ai[n] != 0) { 1829263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 1839566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1849566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 1859566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 1869566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 1879263d837SHong Zhang } else { 1889566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 1899263d837SHong Zhang } 1909263d837SHong Zhang #endif 19135233d99SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1924d12350bSJunchao Zhang if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1939566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 1943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1959b48462bSShri Abhyankar } 1969b48462bSShri Abhyankar 1975b5bc046SBarry Smith /* 1985b5bc046SBarry Smith Trouble in factorization, should we dump the original matrix? 1995b5bc046SBarry Smith */ 200d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorDumpMatrix(Mat A) 201d71ae5a4SJacob Faibussowitsch { 202ace3abfcSBarry Smith PetscBool flg = PETSC_FALSE; 2035b5bc046SBarry Smith 2045b5bc046SBarry Smith PetscFunctionBegin; 2059566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 2065b5bc046SBarry Smith if (flg) { 2075b5bc046SBarry Smith PetscViewer viewer; 2085b5bc046SBarry Smith char filename[PETSC_MAX_PATH_LEN]; 2095b5bc046SBarry Smith 2109566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 2119566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 2129566063dSJacob Faibussowitsch PetscCall(MatView(A, viewer)); 2139566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&viewer)); 2145b5bc046SBarry Smith } 2153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2165b5bc046SBarry Smith } 2175b5bc046SBarry Smith 218d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 219d71ae5a4SJacob Faibussowitsch { 220c9c72f8fSHong Zhang Mat C = B; 221c9c72f8fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 222c9c72f8fSHong Zhang IS isrow = b->row, isicol = b->icol; 223c9c72f8fSHong Zhang const PetscInt *r, *ic, *ics; 224bbd65245SShri Abhyankar const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 225bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row, *pj; 226bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp; 227bbd65245SShri Abhyankar MatScalar *rtmp, *pc, multiplier, *pv; 228b65878eeSJunchao Zhang const MatScalar *aa, *v; 229b65878eeSJunchao Zhang MatScalar *ba; 230ace3abfcSBarry Smith PetscBool row_identity, col_identity; 231d90ac83dSHong Zhang FactorShiftCtx sctx; 2324f81c4b7SBarry Smith const PetscInt *ddiag; 233d4ad06f7SHong Zhang PetscReal rs; 234d4ad06f7SHong Zhang MatScalar d; 235d4ad06f7SHong Zhang 236c9c72f8fSHong Zhang PetscFunctionBegin; 237b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 238b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 239d6e5152cSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 2409566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 241d4ad06f7SHong Zhang 242f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 243421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 244d4ad06f7SHong Zhang sctx.shift_top = info->zeropivot; 245d4ad06f7SHong Zhang for (i = 0; i < n; i++) { 246d4ad06f7SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 247421480d9SBarry Smith d = aa[ddiag[i]]; 248d4ad06f7SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 249d4ad06f7SHong Zhang v = aa + ai[i]; 250d4ad06f7SHong Zhang nz = ai[i + 1] - ai[i]; 2512205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 252d4ad06f7SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 253d4ad06f7SHong Zhang } 254d4ad06f7SHong Zhang sctx.shift_top *= 1.1; 255d4ad06f7SHong Zhang sctx.nshift_max = 5; 256d4ad06f7SHong Zhang sctx.shift_lo = 0.; 257d4ad06f7SHong Zhang sctx.shift_hi = 1.; 258d4ad06f7SHong Zhang } 259d4ad06f7SHong Zhang 2609566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 2619566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 2629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 263c9c72f8fSHong Zhang ics = ic; 264c9c72f8fSHong Zhang 265d4ad06f7SHong Zhang do { 26607b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 267c9c72f8fSHong Zhang for (i = 0; i < n; i++) { 268c9c72f8fSHong Zhang /* zero rtmp */ 269c9c72f8fSHong Zhang /* L part */ 270c9c72f8fSHong Zhang nz = bi[i + 1] - bi[i]; 271c9c72f8fSHong Zhang bjtmp = bj + bi[i]; 272c9c72f8fSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 273c9c72f8fSHong Zhang 274c9c72f8fSHong Zhang /* U part */ 27562fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 27662fbd6afSShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 277813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 278813a1f2bSShri Abhyankar 279813a1f2bSShri Abhyankar /* load in initial (unfactored row) */ 280813a1f2bSShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]]; 281813a1f2bSShri Abhyankar ajtmp = aj + ai[r[i]]; 282813a1f2bSShri Abhyankar v = aa + ai[r[i]]; 283ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 284813a1f2bSShri Abhyankar /* ZeropivotApply() */ 28598a93161SHong Zhang rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 286813a1f2bSShri Abhyankar 287813a1f2bSShri Abhyankar /* elimination */ 288813a1f2bSShri Abhyankar bjtmp = bj + bi[i]; 289813a1f2bSShri Abhyankar row = *bjtmp++; 290813a1f2bSShri Abhyankar nzL = bi[i + 1] - bi[i]; 291f268cba8SShri Abhyankar for (k = 0; k < nzL; k++) { 292813a1f2bSShri Abhyankar pc = rtmp + row; 293813a1f2bSShri Abhyankar if (*pc != 0.0) { 294b65878eeSJunchao Zhang pv = ba + bdiag[row]; 295813a1f2bSShri Abhyankar multiplier = *pc * (*pv); 296813a1f2bSShri Abhyankar *pc = multiplier; 2972205254eSKarl Rupp 29862fbd6afSShri Abhyankar pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 299b65878eeSJunchao Zhang pv = ba + bdiag[row + 1] + 1; 30062fbd6afSShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 3012205254eSKarl Rupp 302813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 3039566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 304813a1f2bSShri Abhyankar } 305f268cba8SShri Abhyankar row = *bjtmp++; 306813a1f2bSShri Abhyankar } 307813a1f2bSShri Abhyankar 308813a1f2bSShri Abhyankar /* finished row so stick it into b->a */ 309813a1f2bSShri Abhyankar rs = 0.0; 310813a1f2bSShri Abhyankar /* L part */ 311b65878eeSJunchao Zhang pv = ba + bi[i]; 312813a1f2bSShri Abhyankar pj = b->j + bi[i]; 313813a1f2bSShri Abhyankar nz = bi[i + 1] - bi[i]; 314813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 3159371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 3169371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 317813a1f2bSShri Abhyankar } 318813a1f2bSShri Abhyankar 319813a1f2bSShri Abhyankar /* U part */ 320b65878eeSJunchao Zhang pv = ba + bdiag[i + 1] + 1; 32162fbd6afSShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 32262fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 323813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 3249371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 3259371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 326813a1f2bSShri Abhyankar } 327813a1f2bSShri Abhyankar 328813a1f2bSShri Abhyankar sctx.rs = rs; 329813a1f2bSShri Abhyankar sctx.pv = rtmp[i]; 3309566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 33107b50cabSHong Zhang if (sctx.newshift) break; /* break for-loop */ 33207b50cabSHong Zhang rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 333813a1f2bSShri Abhyankar 334a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 335b65878eeSJunchao Zhang pv = ba + bdiag[i]; 336813a1f2bSShri Abhyankar *pv = 1.0 / rtmp[i]; 337813a1f2bSShri Abhyankar 338813a1f2bSShri Abhyankar } /* endof for (i=0; i<n; i++) { */ 339813a1f2bSShri Abhyankar 3408ff23777SHong Zhang /* MatPivotRefine() */ 34107b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 342813a1f2bSShri Abhyankar /* 343813a1f2bSShri Abhyankar * if no shift in this attempt & shifting & started shifting & can refine, 344813a1f2bSShri Abhyankar * then try lower shift 345813a1f2bSShri Abhyankar */ 346813a1f2bSShri Abhyankar sctx.shift_hi = sctx.shift_fraction; 347813a1f2bSShri Abhyankar sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 348813a1f2bSShri Abhyankar sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 34907b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 350813a1f2bSShri Abhyankar sctx.nshift++; 351813a1f2bSShri Abhyankar } 35207b50cabSHong Zhang } while (sctx.newshift); 353813a1f2bSShri Abhyankar 354b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 355b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 356b65878eeSJunchao Zhang 3579566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 3589566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 3599566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 360d3ac4fa3SBarry Smith 3619566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 3629566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 3634d12350bSJunchao Zhang if (b->inode.size_csr) { 364abb87a52SBarry Smith C->ops->solve = MatSolve_SeqAIJ_Inode; 365abb87a52SBarry Smith } else if (row_identity && col_identity) { 36635233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 367813a1f2bSShri Abhyankar } else { 36835233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 369813a1f2bSShri Abhyankar } 37035233d99SShri Abhyankar C->ops->solveadd = MatSolveAdd_SeqAIJ; 37135233d99SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 37235233d99SShri Abhyankar C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 37335233d99SShri Abhyankar C->ops->matsolve = MatMatSolve_SeqAIJ; 374a3d9026eSPierre Jolivet C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ; 375813a1f2bSShri Abhyankar C->assembled = PETSC_TRUE; 376813a1f2bSShri Abhyankar C->preallocated = PETSC_TRUE; 3772205254eSKarl Rupp 3789566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 379813a1f2bSShri Abhyankar 38014d2772eSHong Zhang /* MatShiftView(A,info,&sctx) */ 381813a1f2bSShri Abhyankar if (sctx.nshift) { 382f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 3839566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 384f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 3859566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 386f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 3879566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 388813a1f2bSShri Abhyankar } 389813a1f2bSShri Abhyankar } 3903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 391813a1f2bSShri Abhyankar } 392813a1f2bSShri Abhyankar 393ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 394ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 395ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 396ff6a9541SJacob Faibussowitsch 397d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 398d71ae5a4SJacob Faibussowitsch { 399719d5645SBarry Smith Mat C = B; 400416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 40182bf6240SBarry Smith IS isrow = b->row, isicol = b->icol; 4025d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 403d278edc7SBarry Smith PetscInt nz, row, i, j, n = A->rmap->n, diag; 404d278edc7SBarry Smith const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 405421480d9SBarry Smith const PetscInt *ajtmp, *bjtmp, *ddiag, *pj; 406d278edc7SBarry Smith MatScalar *pv, *rtmp, *pc, multiplier, d; 407b65878eeSJunchao Zhang const MatScalar *v, *aa; 408b65878eeSJunchao Zhang MatScalar *ba; 40933f9893dSHong Zhang PetscReal rs = 0.0; 410d90ac83dSHong Zhang FactorShiftCtx sctx; 411ace3abfcSBarry Smith PetscBool row_identity, col_identity; 412289bc588SBarry Smith 4133a40ed3dSBarry Smith PetscFunctionBegin; 414421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 415421480d9SBarry Smith 416b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 417b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 418504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 4199566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 420ada7143aSSatish Balay 421f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 422421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 4239f7d0b68SHong Zhang sctx.shift_top = info->zeropivot; 4246cc28720Svictorle for (i = 0; i < n; i++) { 4259f95998fSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 426421480d9SBarry Smith d = aa[ddiag[i]]; 4271a890ab1SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 4289f7d0b68SHong Zhang v = aa + ai[i]; 4299f7d0b68SHong Zhang nz = ai[i + 1] - ai[i]; 4302205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 4311a890ab1SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 4326cc28720Svictorle } 433118f5789SBarry Smith sctx.shift_top *= 1.1; 434d2276718SHong Zhang sctx.nshift_max = 5; 435d2276718SHong Zhang sctx.shift_lo = 0.; 436d2276718SHong Zhang sctx.shift_hi = 1.; 437a0a356efSHong Zhang } 438a0a356efSHong Zhang 4399566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 4409566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 4419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 442504a3fadSHong Zhang ics = ic; 443504a3fadSHong Zhang 444085256b3SBarry Smith do { 44507b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 446289bc588SBarry Smith for (i = 0; i < n; i++) { 447b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 448b3bf805bSHong Zhang bjtmp = bj + bi[i]; 4499f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 450289bc588SBarry Smith 451289bc588SBarry Smith /* load in initial (unfactored row) */ 4529f7d0b68SHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 4539f7d0b68SHong Zhang ajtmp = aj + ai[r[i]]; 4549f7d0b68SHong Zhang v = aa + ai[r[i]]; 455ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 456d2276718SHong Zhang rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 457289bc588SBarry Smith 458b3bf805bSHong Zhang row = *bjtmp++; 459f2582111SSatish Balay while (row < i) { 4608c37ef55SBarry Smith pc = rtmp + row; 4618c37ef55SBarry Smith if (*pc != 0.0) { 462421480d9SBarry Smith pv = ba + ddiag[row]; 463421480d9SBarry Smith pj = b->j + ddiag[row] + 1; 46435aab85fSBarry Smith multiplier = *pc / *pv++; 4658c37ef55SBarry Smith *pc = multiplier; 466421480d9SBarry Smith nz = bi[row + 1] - ddiag[row] - 1; 4679f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 4689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 469289bc588SBarry Smith } 470b3bf805bSHong Zhang row = *bjtmp++; 471289bc588SBarry Smith } 472416022c9SBarry Smith /* finished row so stick it into b->a */ 473b65878eeSJunchao Zhang pv = ba + bi[i]; 474b3bf805bSHong Zhang pj = b->j + bi[i]; 475b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 476421480d9SBarry Smith diag = ddiag[i] - bi[i]; 477e57ff13aSBarry Smith rs = 0.0; 478d3d32019SBarry Smith for (j = 0; j < nz; j++) { 4799f7d0b68SHong Zhang pv[j] = rtmp[pj[j]]; 4809f7d0b68SHong Zhang rs += PetscAbsScalar(pv[j]); 481d3d32019SBarry Smith } 482e57ff13aSBarry Smith rs -= PetscAbsScalar(pv[diag]); 483d2276718SHong Zhang 484d2276718SHong Zhang sctx.rs = rs; 485d2276718SHong Zhang sctx.pv = pv[diag]; 4869566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 48707b50cabSHong Zhang if (sctx.newshift) break; 488504a3fadSHong Zhang pv[diag] = sctx.pv; 48935aab85fSBarry Smith } 490d2276718SHong Zhang 49107b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 4926cc28720Svictorle /* 4936c037d1bSvictorle * if no shift in this attempt & shifting & started shifting & can refine, 4946cc28720Svictorle * then try lower shift 4956cc28720Svictorle */ 4960481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 4970481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 4980481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 49907b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 500d2276718SHong Zhang sctx.nshift++; 5016cc28720Svictorle } 50207b50cabSHong Zhang } while (sctx.newshift); 5030f11f9aeSSatish Balay 504a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 505421480d9SBarry Smith for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]]; 5069566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 5079566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 5089566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 509b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 510b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 511d3ac4fa3SBarry Smith 5129566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 5139566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 5148d8c7c43SBarry Smith if (row_identity && col_identity) { 515ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 5168d8c7c43SBarry Smith } else { 517ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_inplace; 518db4efbfdSBarry Smith } 519ad04f41aSHong Zhang C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 520ad04f41aSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 521ad04f41aSHong Zhang C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 522ad04f41aSHong Zhang C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 523a3d9026eSPierre Jolivet C->ops->matsolvetranspose = NULL; 5242205254eSKarl Rupp 525c456f294SBarry Smith C->assembled = PETSC_TRUE; 5265c9eb25fSBarry Smith C->preallocated = PETSC_TRUE; 5272205254eSKarl Rupp 5289566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 529d2276718SHong Zhang if (sctx.nshift) { 530f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 5319566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 532f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 5339566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 5346cc28720Svictorle } 5350697b6caSHong Zhang } 53657508eceSPierre Jolivet C->ops->solve = MatSolve_SeqAIJ_inplace; 53757508eceSPierre Jolivet C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 5382205254eSKarl Rupp 5399566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode(C)); 5403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 541289bc588SBarry Smith } 5420889b5dcSvictorle 543ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 544ff6a9541SJacob Faibussowitsch 545137fb511SHong Zhang /* 546137fb511SHong Zhang This routine implements inplace ILU(0) with row or/and column permutations. 547137fb511SHong Zhang Input: 548137fb511SHong Zhang A - original matrix 549137fb511SHong Zhang Output; 550137fb511SHong Zhang A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 551137fb511SHong Zhang a->j (col index) is permuted by the inverse of colperm, then sorted 552137fb511SHong Zhang a->a reordered accordingly with a->j 553137fb511SHong Zhang a->diag (ptr to diagonal elements) is updated. 554137fb511SHong Zhang */ 555d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 556d71ae5a4SJacob Faibussowitsch { 557b051339dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 558b051339dSHong Zhang IS isrow = a->row, isicol = a->icol; 5595d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 5605d0c19d7SBarry Smith PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 5615d0c19d7SBarry Smith PetscInt *ajtmp, nz, row; 562421480d9SBarry Smith PetscInt nbdiag, *pj; 563a77337e4SBarry Smith PetscScalar *rtmp, *pc, multiplier, d; 564504a3fadSHong Zhang MatScalar *pv, *v; 565137fb511SHong Zhang PetscReal rs; 566d90ac83dSHong Zhang FactorShiftCtx sctx; 567b65878eeSJunchao Zhang MatScalar *aa, *vtmp; 568421480d9SBarry Smith PetscInt *diag; 569137fb511SHong Zhang 570137fb511SHong Zhang PetscFunctionBegin; 57108401ef6SPierre Jolivet PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 572504a3fadSHong Zhang 573421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL)); 574b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArray(A, &aa)); 575504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 5769566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 577504a3fadSHong Zhang 578504a3fadSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 579421480d9SBarry Smith const PetscInt *ddiag; 580421480d9SBarry Smith 581421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 582504a3fadSHong Zhang sctx.shift_top = info->zeropivot; 583504a3fadSHong Zhang for (i = 0; i < n; i++) { 584504a3fadSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 585421480d9SBarry Smith d = aa[ddiag[i]]; 586504a3fadSHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 587504a3fadSHong Zhang vtmp = aa + ai[i]; 588504a3fadSHong Zhang nz = ai[i + 1] - ai[i]; 5892205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 590504a3fadSHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 591504a3fadSHong Zhang } 592504a3fadSHong Zhang sctx.shift_top *= 1.1; 593504a3fadSHong Zhang sctx.nshift_max = 5; 594504a3fadSHong Zhang sctx.shift_lo = 0.; 595504a3fadSHong Zhang sctx.shift_hi = 1.; 596504a3fadSHong Zhang } 597504a3fadSHong Zhang 5989566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 5999566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 6009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 6019566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, n + 1)); 602b051339dSHong Zhang ics = ic; 603137fb511SHong Zhang 604504a3fadSHong Zhang #if defined(MV) 60575567043SBarry Smith sctx.shift_top = 0.; 606e60cf9a0SBarry Smith sctx.nshift_max = 0; 60775567043SBarry Smith sctx.shift_lo = 0.; 60875567043SBarry Smith sctx.shift_hi = 0.; 60975567043SBarry Smith sctx.shift_fraction = 0.; 610137fb511SHong Zhang 611f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 61275567043SBarry Smith sctx.shift_top = 0.; 613137fb511SHong Zhang for (i = 0; i < n; i++) { 614137fb511SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 615b65878eeSJunchao Zhang d = aa[diag[i]]; 616137fb511SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 617b65878eeSJunchao Zhang v = aa + ai[i]; 618b051339dSHong Zhang nz = ai[i + 1] - ai[i]; 6192205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 620137fb511SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 621137fb511SHong Zhang } 622137fb511SHong Zhang if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 623137fb511SHong Zhang sctx.shift_top *= 1.1; 624137fb511SHong Zhang sctx.nshift_max = 5; 625137fb511SHong Zhang sctx.shift_lo = 0.; 626137fb511SHong Zhang sctx.shift_hi = 1.; 627137fb511SHong Zhang } 628137fb511SHong Zhang 62975567043SBarry Smith sctx.shift_amount = 0.; 630137fb511SHong Zhang sctx.nshift = 0; 631504a3fadSHong Zhang #endif 632504a3fadSHong Zhang 633137fb511SHong Zhang do { 63407b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 635137fb511SHong Zhang for (i = 0; i < n; i++) { 636b051339dSHong Zhang /* load in initial unfactored row */ 637b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 638b051339dSHong Zhang ajtmp = aj + ai[r[i]]; 639b65878eeSJunchao Zhang v = aa + ai[r[i]]; 640137fb511SHong Zhang /* sort permuted ajtmp and values v accordingly */ 641b051339dSHong Zhang for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 6429566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 643137fb511SHong Zhang 644b051339dSHong Zhang diag[r[i]] = ai[r[i]]; 645137fb511SHong Zhang for (j = 0; j < nz; j++) { 646137fb511SHong Zhang rtmp[ajtmp[j]] = v[j]; 647b051339dSHong Zhang if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 648137fb511SHong Zhang } 649137fb511SHong Zhang rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 650137fb511SHong Zhang 651b051339dSHong Zhang row = *ajtmp++; 652137fb511SHong Zhang while (row < i) { 653137fb511SHong Zhang pc = rtmp + row; 654137fb511SHong Zhang if (*pc != 0.0) { 655b65878eeSJunchao Zhang pv = aa + diag[r[row]]; 656b051339dSHong Zhang pj = aj + diag[r[row]] + 1; 657137fb511SHong Zhang 658137fb511SHong Zhang multiplier = *pc / *pv++; 659137fb511SHong Zhang *pc = multiplier; 660b051339dSHong Zhang nz = ai[r[row] + 1] - diag[r[row]] - 1; 661b051339dSHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 6629566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 663137fb511SHong Zhang } 664b051339dSHong Zhang row = *ajtmp++; 665137fb511SHong Zhang } 666b65878eeSJunchao Zhang /* finished row so overwrite it onto aa */ 667b65878eeSJunchao Zhang pv = aa + ai[r[i]]; 668b051339dSHong Zhang pj = aj + ai[r[i]]; 669b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 670b051339dSHong Zhang nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 671137fb511SHong Zhang 672137fb511SHong Zhang rs = 0.0; 673137fb511SHong Zhang for (j = 0; j < nz; j++) { 674b051339dSHong Zhang pv[j] = rtmp[pj[j]]; 675b051339dSHong Zhang if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 676137fb511SHong Zhang } 677137fb511SHong Zhang 678137fb511SHong Zhang sctx.rs = rs; 679b051339dSHong Zhang sctx.pv = pv[nbdiag]; 6809566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 68107b50cabSHong Zhang if (sctx.newshift) break; 682504a3fadSHong Zhang pv[nbdiag] = sctx.pv; 683137fb511SHong Zhang } 684137fb511SHong Zhang 68507b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 686137fb511SHong Zhang /* 687137fb511SHong Zhang * if no shift in this attempt & shifting & started shifting & can refine, 688137fb511SHong Zhang * then try lower shift 689137fb511SHong Zhang */ 6900481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 6910481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 6920481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 69307b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 694137fb511SHong Zhang sctx.nshift++; 695137fb511SHong Zhang } 69607b50cabSHong Zhang } while (sctx.newshift); 697137fb511SHong Zhang 698a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 699b65878eeSJunchao Zhang for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]]; 700137fb511SHong Zhang 701b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArray(A, &aa)); 7029566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 7039566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 7049566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 7052205254eSKarl Rupp 706b051339dSHong Zhang A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 707ad04f41aSHong Zhang A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 708ad04f41aSHong Zhang A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 709ad04f41aSHong Zhang A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 7102205254eSKarl Rupp 711b051339dSHong Zhang A->assembled = PETSC_TRUE; 7125c9eb25fSBarry Smith A->preallocated = PETSC_TRUE; 7132205254eSKarl Rupp 7149566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(A->cmap->n)); 715137fb511SHong Zhang if (sctx.nshift) { 716f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 7179566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 718f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 7199566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 720137fb511SHong Zhang } 721137fb511SHong Zhang } 7223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 723137fb511SHong Zhang } 724137fb511SHong Zhang 725d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 726d71ae5a4SJacob Faibussowitsch { 727416022c9SBarry Smith Mat C; 728416022c9SBarry Smith 7293a40ed3dSBarry Smith PetscFunctionBegin; 7309566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 7319566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 7329566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(C, A, info)); 7332205254eSKarl Rupp 734db4efbfdSBarry Smith A->ops->solve = C->ops->solve; 735db4efbfdSBarry Smith A->ops->solvetranspose = C->ops->solvetranspose; 7362205254eSKarl Rupp 7379566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 7383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 739da3a660dSBarry Smith } 7401d098868SHong Zhang 741d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 742d71ae5a4SJacob Faibussowitsch { 743416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 744416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 7455d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 7465d0c19d7SBarry Smith PetscInt nz; 7475d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 748dd6ea824SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 749d9fead3dSBarry Smith const PetscScalar *b; 750b65878eeSJunchao Zhang const MatScalar *aa, *v; 751421480d9SBarry Smith const PetscInt *adiag; 7528c37ef55SBarry Smith 7533a40ed3dSBarry Smith PetscFunctionBegin; 7543ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 75591bf9adeSBarry Smith 756421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 757b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 7589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 7599566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 760416022c9SBarry Smith tmp = a->solve_work; 7618c37ef55SBarry Smith 7629371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 7639371c9d4SSatish Balay r = rout; 7649371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 7659371c9d4SSatish Balay c = cout + (n - 1); 7668c37ef55SBarry Smith 7678c37ef55SBarry Smith /* forward solve the lower triangular */ 7688c37ef55SBarry Smith tmp[0] = b[*r++]; 769010a20caSHong Zhang tmps = tmp; 7708c37ef55SBarry Smith for (i = 1; i < n; i++) { 771010a20caSHong Zhang v = aa + ai[i]; 772010a20caSHong Zhang vi = aj + ai[i]; 773421480d9SBarry Smith nz = adiag[i] - ai[i]; 77453e7425aSBarry Smith sum = b[*r++]; 775003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 7768c37ef55SBarry Smith tmp[i] = sum; 7778c37ef55SBarry Smith } 7788c37ef55SBarry Smith 7798c37ef55SBarry Smith /* backward solve the upper triangular */ 7808c37ef55SBarry Smith for (i = n - 1; i >= 0; i--) { 781421480d9SBarry Smith v = aa + adiag[i] + 1; 782421480d9SBarry Smith vi = aj + adiag[i] + 1; 783421480d9SBarry Smith nz = ai[i + 1] - adiag[i] - 1; 7848c37ef55SBarry Smith sum = tmp[i]; 785003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 786421480d9SBarry Smith x[*c--] = tmp[i] = sum * aa[adiag[i]]; 7878c37ef55SBarry Smith } 788b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 7899566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 7909566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 7919566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 7929566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 7939566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 7943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7958c37ef55SBarry Smith } 796026e39d0SSatish Balay 797ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 798d71ae5a4SJacob Faibussowitsch { 7992bebee5dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8002bebee5dSHong Zhang IS iscol = a->col, isrow = a->row; 8015d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 802910cf402Sprj- PetscInt nz, neq, ldb, ldx; 8035d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 804910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 805b65878eeSJunchao Zhang const PetscScalar *b, *aa, *v; 806910cf402Sprj- PetscBool isdense; 807421480d9SBarry Smith const PetscInt *adiag; 8082bebee5dSHong Zhang 8092bebee5dSHong Zhang PetscFunctionBegin; 8103ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 8119566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 81228b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 813f9fb9879SHong Zhang if (X != B) { 8149566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 81528b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 816f9fb9879SHong Zhang } 817421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 818b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 8199566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 8209566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 8219566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 8229566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 8239371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 8249371c9d4SSatish Balay r = rout; 8259371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 8269371c9d4SSatish Balay c = cout; 827a36b22ccSSatish Balay for (neq = 0; neq < B->cmap->n; neq++) { 8282bebee5dSHong Zhang /* forward solve the lower triangular */ 8292bebee5dSHong Zhang tmp[0] = b[r[0]]; 8302bebee5dSHong Zhang tmps = tmp; 8312bebee5dSHong Zhang for (i = 1; i < n; i++) { 8322bebee5dSHong Zhang v = aa + ai[i]; 8332bebee5dSHong Zhang vi = aj + ai[i]; 834421480d9SBarry Smith nz = adiag[i] - ai[i]; 8352bebee5dSHong Zhang sum = b[r[i]]; 836003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 8372bebee5dSHong Zhang tmp[i] = sum; 8382bebee5dSHong Zhang } 8392bebee5dSHong Zhang /* backward solve the upper triangular */ 8402bebee5dSHong Zhang for (i = n - 1; i >= 0; i--) { 841421480d9SBarry Smith v = aa + adiag[i] + 1; 842421480d9SBarry Smith vi = aj + adiag[i] + 1; 843421480d9SBarry Smith nz = ai[i + 1] - adiag[i] - 1; 8442bebee5dSHong Zhang sum = tmp[i]; 845003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 846421480d9SBarry Smith x[c[i]] = tmp[i] = sum * aa[adiag[i]]; 8472bebee5dSHong Zhang } 848910cf402Sprj- b += ldb; 849910cf402Sprj- x += ldx; 8502bebee5dSHong Zhang } 851b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 8529566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 8539566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 8549566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 8559566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 8569566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 8573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8582bebee5dSHong Zhang } 8592bebee5dSHong Zhang 860d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 861d71ae5a4SJacob Faibussowitsch { 8629bd0847aSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8639bd0847aSShri Abhyankar IS iscol = a->col, isrow = a->row; 864421480d9SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 865421480d9SBarry Smith const PetscInt *adiag; 866910cf402Sprj- PetscInt nz, neq, ldb, ldx; 8679bd0847aSShri Abhyankar const PetscInt *rout, *cout, *r, *c; 868910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, sum; 869b65878eeSJunchao Zhang const PetscScalar *b, *aa, *v; 870910cf402Sprj- PetscBool isdense; 8719bd0847aSShri Abhyankar 8729bd0847aSShri Abhyankar PetscFunctionBegin; 8733ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 8749566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 87528b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 876f9fb9879SHong Zhang if (X != B) { 8779566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 87828b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 879f9fb9879SHong Zhang } 880421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 881b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 8829566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 8839566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 8849566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 8859566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 8869371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 8879371c9d4SSatish Balay r = rout; 8889371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 8899371c9d4SSatish Balay c = cout; 8909bd0847aSShri Abhyankar for (neq = 0; neq < B->cmap->n; neq++) { 8919bd0847aSShri Abhyankar /* forward solve the lower triangular */ 8929bd0847aSShri Abhyankar tmp[0] = b[r[0]]; 8939bd0847aSShri Abhyankar v = aa; 8949bd0847aSShri Abhyankar vi = aj; 8959bd0847aSShri Abhyankar for (i = 1; i < n; i++) { 8969bd0847aSShri Abhyankar nz = ai[i + 1] - ai[i]; 8979bd0847aSShri Abhyankar sum = b[r[i]]; 8989bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 8999bd0847aSShri Abhyankar tmp[i] = sum; 9009371c9d4SSatish Balay v += nz; 9019371c9d4SSatish Balay vi += nz; 9029bd0847aSShri Abhyankar } 9039bd0847aSShri Abhyankar /* backward solve the upper triangular */ 9049bd0847aSShri Abhyankar for (i = n - 1; i >= 0; i--) { 9059bd0847aSShri Abhyankar v = aa + adiag[i + 1] + 1; 9069bd0847aSShri Abhyankar vi = aj + adiag[i + 1] + 1; 9079bd0847aSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 9089bd0847aSShri Abhyankar sum = tmp[i]; 9099bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 9109bd0847aSShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 9119bd0847aSShri Abhyankar } 912910cf402Sprj- b += ldb; 913910cf402Sprj- x += ldx; 9149bd0847aSShri Abhyankar } 915b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 9169566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 9179566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 9189566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 9199566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 9209566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 9213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9229bd0847aSShri Abhyankar } 9239bd0847aSShri Abhyankar 924a3d9026eSPierre Jolivet PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X) 925a3d9026eSPierre Jolivet { 926a3d9026eSPierre Jolivet Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 927a3d9026eSPierre Jolivet IS iscol = a->col, isrow = a->row; 928421480d9SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j; 929421480d9SBarry Smith const PetscInt *adiag = a->diag; 930a3d9026eSPierre Jolivet PetscInt nz, neq, ldb, ldx; 931a3d9026eSPierre Jolivet const PetscInt *rout, *cout, *r, *c; 932a3d9026eSPierre Jolivet PetscScalar *x, *tmp = a->solve_work, s1; 933b65878eeSJunchao Zhang const PetscScalar *b, *aa, *v; 934a3d9026eSPierre Jolivet PetscBool isdense; 935a3d9026eSPierre Jolivet 936a3d9026eSPierre Jolivet PetscFunctionBegin; 937a3d9026eSPierre Jolivet if (!n) PetscFunctionReturn(PETSC_SUCCESS); 938a3d9026eSPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 939a3d9026eSPierre Jolivet PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 940a3d9026eSPierre Jolivet if (X != B) { 941a3d9026eSPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 942a3d9026eSPierre Jolivet PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 943a3d9026eSPierre Jolivet } 944421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 945b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 946a3d9026eSPierre Jolivet PetscCall(MatDenseGetArrayRead(B, &b)); 947a3d9026eSPierre Jolivet PetscCall(MatDenseGetLDA(B, &ldb)); 948a3d9026eSPierre Jolivet PetscCall(MatDenseGetArray(X, &x)); 949a3d9026eSPierre Jolivet PetscCall(MatDenseGetLDA(X, &ldx)); 950a3d9026eSPierre Jolivet PetscCall(ISGetIndices(isrow, &rout)); 951a3d9026eSPierre Jolivet r = rout; 952a3d9026eSPierre Jolivet PetscCall(ISGetIndices(iscol, &cout)); 953a3d9026eSPierre Jolivet c = cout; 954a3d9026eSPierre Jolivet for (neq = 0; neq < B->cmap->n; neq++) { 955a3d9026eSPierre Jolivet /* copy the b into temp work space according to permutation */ 956a3d9026eSPierre Jolivet for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 957a3d9026eSPierre Jolivet 958a3d9026eSPierre Jolivet /* forward solve the U^T */ 959a3d9026eSPierre Jolivet for (i = 0; i < n; i++) { 960a3d9026eSPierre Jolivet v = aa + adiag[i + 1] + 1; 961a3d9026eSPierre Jolivet vi = aj + adiag[i + 1] + 1; 962a3d9026eSPierre Jolivet nz = adiag[i] - adiag[i + 1] - 1; 963a3d9026eSPierre Jolivet s1 = tmp[i]; 964a3d9026eSPierre Jolivet s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 965a3d9026eSPierre Jolivet for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 966a3d9026eSPierre Jolivet tmp[i] = s1; 967a3d9026eSPierre Jolivet } 968a3d9026eSPierre Jolivet 969a3d9026eSPierre Jolivet /* backward solve the L^T */ 970a3d9026eSPierre Jolivet for (i = n - 1; i >= 0; i--) { 971a3d9026eSPierre Jolivet v = aa + ai[i]; 972a3d9026eSPierre Jolivet vi = aj + ai[i]; 973a3d9026eSPierre Jolivet nz = ai[i + 1] - ai[i]; 974a3d9026eSPierre Jolivet s1 = tmp[i]; 975a3d9026eSPierre Jolivet for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 976a3d9026eSPierre Jolivet } 977a3d9026eSPierre Jolivet 978a3d9026eSPierre Jolivet /* copy tmp into x according to permutation */ 979a3d9026eSPierre Jolivet for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 980a3d9026eSPierre Jolivet b += ldb; 981a3d9026eSPierre Jolivet x += ldx; 982a3d9026eSPierre Jolivet } 983b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 984a3d9026eSPierre Jolivet PetscCall(ISRestoreIndices(isrow, &rout)); 985a3d9026eSPierre Jolivet PetscCall(ISRestoreIndices(iscol, &cout)); 986a3d9026eSPierre Jolivet PetscCall(MatDenseRestoreArrayRead(B, &b)); 987a3d9026eSPierre Jolivet PetscCall(MatDenseRestoreArray(X, &x)); 988a3d9026eSPierre Jolivet PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 989a3d9026eSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 990a3d9026eSPierre Jolivet } 991a3d9026eSPierre Jolivet 992ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 993d71ae5a4SJacob Faibussowitsch { 994137fb511SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 995137fb511SHong Zhang IS iscol = a->col, isrow = a->row; 996421480d9SBarry Smith const PetscInt *r, *c, *rout, *cout, *adiag; 9975d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 9985d0c19d7SBarry Smith PetscInt nz, row; 999fdc842d1SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 1000fdc842d1SBarry Smith const PetscScalar *b; 1001b65878eeSJunchao Zhang const MatScalar *aa, *v; 1002137fb511SHong Zhang 1003137fb511SHong Zhang PetscFunctionBegin; 10043ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1005137fb511SHong Zhang 1006421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1007b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 10089566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 10099566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1010137fb511SHong Zhang tmp = a->solve_work; 1011137fb511SHong Zhang 10129371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 10139371c9d4SSatish Balay r = rout; 10149371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 10159371c9d4SSatish Balay c = cout + (n - 1); 1016137fb511SHong Zhang 1017137fb511SHong Zhang /* forward solve the lower triangular */ 1018137fb511SHong Zhang tmp[0] = b[*r++]; 1019137fb511SHong Zhang tmps = tmp; 1020137fb511SHong Zhang for (row = 1; row < n; row++) { 1021137fb511SHong Zhang i = rout[row]; /* permuted row */ 1022137fb511SHong Zhang v = aa + ai[i]; 1023137fb511SHong Zhang vi = aj + ai[i]; 1024421480d9SBarry Smith nz = adiag[i] - ai[i]; 1025137fb511SHong Zhang sum = b[*r++]; 1026003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1027137fb511SHong Zhang tmp[row] = sum; 1028137fb511SHong Zhang } 1029137fb511SHong Zhang 1030137fb511SHong Zhang /* backward solve the upper triangular */ 1031137fb511SHong Zhang for (row = n - 1; row >= 0; row--) { 1032137fb511SHong Zhang i = rout[row]; /* permuted row */ 1033421480d9SBarry Smith v = aa + adiag[i] + 1; 1034421480d9SBarry Smith vi = aj + adiag[i] + 1; 1035421480d9SBarry Smith nz = ai[i + 1] - adiag[i] - 1; 1036137fb511SHong Zhang sum = tmp[row]; 1037003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1038421480d9SBarry Smith x[*c--] = tmp[row] = sum * aa[adiag[i]]; 1039137fb511SHong Zhang } 1040b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 10419566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 10429566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 10439566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 10449566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 10459566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 10463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1047137fb511SHong Zhang } 1048137fb511SHong Zhang 1049c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1050ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1051d71ae5a4SJacob Faibussowitsch { 1052930ae53cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1053d0f46423SBarry Smith PetscInt n = A->rmap->n; 1054421480d9SBarry Smith const PetscInt *ai = a->i, *aj = a->j, *adiag; 1055356650c2SBarry Smith PetscScalar *x; 1056356650c2SBarry Smith const PetscScalar *b; 1057b65878eeSJunchao Zhang const MatScalar *aa; 1058aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1059356650c2SBarry Smith PetscInt adiag_i, i, nz, ai_i; 1060b377110cSBarry Smith const PetscInt *vi; 1061dd6ea824SBarry Smith const MatScalar *v; 1062dd6ea824SBarry Smith PetscScalar sum; 1063d85afc42SSatish Balay #endif 1064930ae53cSBarry Smith 10653a40ed3dSBarry Smith PetscFunctionBegin; 10663ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1067421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1068b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 10699566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 10709566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1071930ae53cSBarry Smith 1072aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 10731c4feecaSSatish Balay fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 10741c4feecaSSatish Balay #else 1075930ae53cSBarry Smith /* forward solve the lower triangular */ 1076930ae53cSBarry Smith x[0] = b[0]; 1077930ae53cSBarry Smith for (i = 1; i < n; i++) { 1078930ae53cSBarry Smith ai_i = ai[i]; 1079930ae53cSBarry Smith v = aa + ai_i; 1080930ae53cSBarry Smith vi = aj + ai_i; 1081930ae53cSBarry Smith nz = adiag[i] - ai_i; 1082930ae53cSBarry Smith sum = b[i]; 1083003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1084930ae53cSBarry Smith x[i] = sum; 1085930ae53cSBarry Smith } 1086930ae53cSBarry Smith 1087930ae53cSBarry Smith /* backward solve the upper triangular */ 1088930ae53cSBarry Smith for (i = n - 1; i >= 0; i--) { 1089930ae53cSBarry Smith adiag_i = adiag[i]; 1090d708749eSBarry Smith v = aa + adiag_i + 1; 1091d708749eSBarry Smith vi = aj + adiag_i + 1; 1092930ae53cSBarry Smith nz = ai[i + 1] - adiag_i - 1; 1093930ae53cSBarry Smith sum = x[i]; 1094003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1095930ae53cSBarry Smith x[i] = sum * aa[adiag_i]; 1096930ae53cSBarry Smith } 10971c4feecaSSatish Balay #endif 10989566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1099b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 11009566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 11019566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 11023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1103930ae53cSBarry Smith } 1104930ae53cSBarry Smith 1105ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1106d71ae5a4SJacob Faibussowitsch { 1107416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1108416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 110904fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 11105d0c19d7SBarry Smith PetscInt nz; 1111421480d9SBarry Smith const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag; 111204fbf559SBarry Smith PetscScalar *x, *tmp, sum; 111304fbf559SBarry Smith const PetscScalar *b; 1114b65878eeSJunchao Zhang const MatScalar *aa, *v; 1115da3a660dSBarry Smith 11163a40ed3dSBarry Smith PetscFunctionBegin; 11179566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 1118da3a660dSBarry Smith 1119421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1120b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 11219566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 11229566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1123416022c9SBarry Smith tmp = a->solve_work; 1124da3a660dSBarry Smith 11259371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 11269371c9d4SSatish Balay r = rout; 11279371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 11289371c9d4SSatish Balay c = cout + (n - 1); 1129da3a660dSBarry Smith 1130da3a660dSBarry Smith /* forward solve the lower triangular */ 1131da3a660dSBarry Smith tmp[0] = b[*r++]; 1132da3a660dSBarry Smith for (i = 1; i < n; i++) { 1133010a20caSHong Zhang v = aa + ai[i]; 1134010a20caSHong Zhang vi = aj + ai[i]; 1135421480d9SBarry Smith nz = adiag[i] - ai[i]; 1136da3a660dSBarry Smith sum = b[*r++]; 113704fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1138da3a660dSBarry Smith tmp[i] = sum; 1139da3a660dSBarry Smith } 1140da3a660dSBarry Smith 1141da3a660dSBarry Smith /* backward solve the upper triangular */ 1142da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1143421480d9SBarry Smith v = aa + adiag[i] + 1; 1144421480d9SBarry Smith vi = aj + adiag[i] + 1; 1145421480d9SBarry Smith nz = ai[i + 1] - adiag[i] - 1; 1146da3a660dSBarry Smith sum = tmp[i]; 114704fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1148421480d9SBarry Smith tmp[i] = sum * aa[adiag[i]]; 1149da3a660dSBarry Smith x[*c--] += tmp[i]; 1150da3a660dSBarry Smith } 1151da3a660dSBarry Smith 1152b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 11539566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 11549566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 11559566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 11569566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 11579566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 11583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1159da3a660dSBarry Smith } 116004fbf559SBarry Smith 1161d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1162d71ae5a4SJacob Faibussowitsch { 11633c0119dfSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 11643c0119dfSShri Abhyankar IS iscol = a->col, isrow = a->row; 11653c0119dfSShri Abhyankar PetscInt i, n = A->rmap->n, j; 11663c0119dfSShri Abhyankar PetscInt nz; 1167421480d9SBarry Smith const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag; 11683c0119dfSShri Abhyankar PetscScalar *x, *tmp, sum; 11693c0119dfSShri Abhyankar const PetscScalar *b; 1170b65878eeSJunchao Zhang const MatScalar *aa, *v; 11713c0119dfSShri Abhyankar 11723c0119dfSShri Abhyankar PetscFunctionBegin; 11739566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 11743c0119dfSShri Abhyankar 1175421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1176b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 11779566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 11789566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 11793c0119dfSShri Abhyankar tmp = a->solve_work; 11803c0119dfSShri Abhyankar 11819371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 11829371c9d4SSatish Balay r = rout; 11839371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 11849371c9d4SSatish Balay c = cout; 11853c0119dfSShri Abhyankar 11863c0119dfSShri Abhyankar /* forward solve the lower triangular */ 11873c0119dfSShri Abhyankar tmp[0] = b[r[0]]; 11883c0119dfSShri Abhyankar v = aa; 11893c0119dfSShri Abhyankar vi = aj; 11903c0119dfSShri Abhyankar for (i = 1; i < n; i++) { 11913c0119dfSShri Abhyankar nz = ai[i + 1] - ai[i]; 11923c0119dfSShri Abhyankar sum = b[r[i]]; 11933c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 11943c0119dfSShri Abhyankar tmp[i] = sum; 11952205254eSKarl Rupp v += nz; 11962205254eSKarl Rupp vi += nz; 11973c0119dfSShri Abhyankar } 11983c0119dfSShri Abhyankar 11993c0119dfSShri Abhyankar /* backward solve the upper triangular */ 12003c0119dfSShri Abhyankar v = aa + adiag[n - 1]; 12013c0119dfSShri Abhyankar vi = aj + adiag[n - 1]; 12023c0119dfSShri Abhyankar for (i = n - 1; i >= 0; i--) { 12033c0119dfSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 12043c0119dfSShri Abhyankar sum = tmp[i]; 12053c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 12063c0119dfSShri Abhyankar tmp[i] = sum * v[nz]; 12073c0119dfSShri Abhyankar x[c[i]] += tmp[i]; 12089371c9d4SSatish Balay v += nz + 1; 12099371c9d4SSatish Balay vi += nz + 1; 12103c0119dfSShri Abhyankar } 12113c0119dfSShri Abhyankar 1212b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 12139566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 12149566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 12159566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12169566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12179566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 12183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 12193c0119dfSShri Abhyankar } 12203c0119dfSShri Abhyankar 1221d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1222d71ae5a4SJacob Faibussowitsch { 1223416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 12242235e667SBarry Smith IS iscol = a->col, isrow = a->row; 122504fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 122604fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 122704fbf559SBarry Smith PetscInt nz; 122804fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1229b65878eeSJunchao Zhang const MatScalar *aa, *v; 123004fbf559SBarry Smith const PetscScalar *b; 1231da3a660dSBarry Smith 12323a40ed3dSBarry Smith PetscFunctionBegin; 1233b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 12349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12359566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1236416022c9SBarry Smith tmp = a->solve_work; 1237da3a660dSBarry Smith 12389371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 12399371c9d4SSatish Balay r = rout; 12409371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 12419371c9d4SSatish Balay c = cout; 1242da3a660dSBarry Smith 1243da3a660dSBarry Smith /* copy the b into temp work space according to permutation */ 12442235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1245da3a660dSBarry Smith 1246da3a660dSBarry Smith /* forward solve the U^T */ 1247da3a660dSBarry Smith for (i = 0; i < n; i++) { 1248010a20caSHong Zhang v = aa + diag[i]; 1249010a20caSHong Zhang vi = aj + diag[i] + 1; 1250f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 1251c38d4ed2SBarry Smith s1 = tmp[i]; 1252ef66eb69SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 125304fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1254c38d4ed2SBarry Smith tmp[i] = s1; 1255da3a660dSBarry Smith } 1256da3a660dSBarry Smith 1257da3a660dSBarry Smith /* backward solve the L^T */ 1258da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1259010a20caSHong Zhang v = aa + diag[i] - 1; 1260010a20caSHong Zhang vi = aj + diag[i] - 1; 1261f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 1262f1af5d2fSBarry Smith s1 = tmp[i]; 126304fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1264da3a660dSBarry Smith } 1265da3a660dSBarry Smith 1266da3a660dSBarry Smith /* copy tmp into x according to permutation */ 1267591294e4SBarry Smith for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1268da3a660dSBarry Smith 12699566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 12709566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1271b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 12729566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12739566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1274da3a660dSBarry Smith 12759566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 12763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1277da3a660dSBarry Smith } 1278da3a660dSBarry Smith 1279d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1280d71ae5a4SJacob Faibussowitsch { 1281d1fa9404SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1282d1fa9404SShri Abhyankar IS iscol = a->col, isrow = a->row; 1283421480d9SBarry Smith const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi; 1284d1fa9404SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1285d1fa9404SShri Abhyankar PetscInt nz; 1286d1fa9404SShri Abhyankar PetscScalar *x, *tmp, s1; 1287b65878eeSJunchao Zhang const MatScalar *aa, *v; 1288d1fa9404SShri Abhyankar const PetscScalar *b; 1289d1fa9404SShri Abhyankar 1290d1fa9404SShri Abhyankar PetscFunctionBegin; 1291421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1292b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 12939566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12949566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1295d1fa9404SShri Abhyankar tmp = a->solve_work; 1296d1fa9404SShri Abhyankar 12979371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 12989371c9d4SSatish Balay r = rout; 12999371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 13009371c9d4SSatish Balay c = cout; 1301d1fa9404SShri Abhyankar 1302d1fa9404SShri Abhyankar /* copy the b into temp work space according to permutation */ 1303d1fa9404SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1304d1fa9404SShri Abhyankar 1305d1fa9404SShri Abhyankar /* forward solve the U^T */ 1306d1fa9404SShri Abhyankar for (i = 0; i < n; i++) { 1307d1fa9404SShri Abhyankar v = aa + adiag[i + 1] + 1; 1308d1fa9404SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1309d1fa9404SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1310d1fa9404SShri Abhyankar s1 = tmp[i]; 1311d1fa9404SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1312d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1313d1fa9404SShri Abhyankar tmp[i] = s1; 1314d1fa9404SShri Abhyankar } 1315d1fa9404SShri Abhyankar 1316d1fa9404SShri Abhyankar /* backward solve the L^T */ 1317d1fa9404SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1318d1fa9404SShri Abhyankar v = aa + ai[i]; 1319d1fa9404SShri Abhyankar vi = aj + ai[i]; 1320d1fa9404SShri Abhyankar nz = ai[i + 1] - ai[i]; 1321d1fa9404SShri Abhyankar s1 = tmp[i]; 1322d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1323d1fa9404SShri Abhyankar } 1324d1fa9404SShri Abhyankar 1325d1fa9404SShri Abhyankar /* copy tmp into x according to permutation */ 1326d1fa9404SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1327d1fa9404SShri Abhyankar 13289566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 13299566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1330b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 13319566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13329566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1333d1fa9404SShri Abhyankar 13349566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 13353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1336d1fa9404SShri Abhyankar } 1337d1fa9404SShri Abhyankar 1338d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1339d71ae5a4SJacob Faibussowitsch { 1340416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 13412235e667SBarry Smith IS iscol = a->col, isrow = a->row; 134204fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 134304fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 134404fbf559SBarry Smith PetscInt nz; 134504fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1346b65878eeSJunchao Zhang const MatScalar *aa, *v; 134704fbf559SBarry Smith const PetscScalar *b; 13486abc6512SBarry Smith 13493a40ed3dSBarry Smith PetscFunctionBegin; 13509566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 1351b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 13529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 13539566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1354416022c9SBarry Smith tmp = a->solve_work; 13556abc6512SBarry Smith 13569371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 13579371c9d4SSatish Balay r = rout; 13589371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 13599371c9d4SSatish Balay c = cout; 13606abc6512SBarry Smith 13616abc6512SBarry Smith /* copy the b into temp work space according to permutation */ 13622235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 13636abc6512SBarry Smith 13646abc6512SBarry Smith /* forward solve the U^T */ 13656abc6512SBarry Smith for (i = 0; i < n; i++) { 1366010a20caSHong Zhang v = aa + diag[i]; 1367010a20caSHong Zhang vi = aj + diag[i] + 1; 1368f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 136904fbf559SBarry Smith s1 = tmp[i]; 137004fbf559SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 137104fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 137204fbf559SBarry Smith tmp[i] = s1; 13736abc6512SBarry Smith } 13746abc6512SBarry Smith 13756abc6512SBarry Smith /* backward solve the L^T */ 13766abc6512SBarry Smith for (i = n - 1; i >= 0; i--) { 1377010a20caSHong Zhang v = aa + diag[i] - 1; 1378010a20caSHong Zhang vi = aj + diag[i] - 1; 1379f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 138004fbf559SBarry Smith s1 = tmp[i]; 138104fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 13826abc6512SBarry Smith } 13836abc6512SBarry Smith 13846abc6512SBarry Smith /* copy tmp into x according to permutation */ 13856abc6512SBarry Smith for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 13866abc6512SBarry Smith 13879566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 13889566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1389b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 13909566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13919566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 13926abc6512SBarry Smith 13939566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 13943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1395da3a660dSBarry Smith } 139604fbf559SBarry Smith 1397d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1398d71ae5a4SJacob Faibussowitsch { 1399533e6011SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1400533e6011SShri Abhyankar IS iscol = a->col, isrow = a->row; 1401421480d9SBarry Smith const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi; 1402533e6011SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1403533e6011SShri Abhyankar PetscInt nz; 1404533e6011SShri Abhyankar PetscScalar *x, *tmp, s1; 1405b65878eeSJunchao Zhang const MatScalar *aa, *v; 1406533e6011SShri Abhyankar const PetscScalar *b; 1407533e6011SShri Abhyankar 1408533e6011SShri Abhyankar PetscFunctionBegin; 14099566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 1410421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1411b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 14129566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 14139566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1414533e6011SShri Abhyankar tmp = a->solve_work; 1415533e6011SShri Abhyankar 14169371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 14179371c9d4SSatish Balay r = rout; 14189371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 14199371c9d4SSatish Balay c = cout; 1420533e6011SShri Abhyankar 1421533e6011SShri Abhyankar /* copy the b into temp work space according to permutation */ 1422533e6011SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1423533e6011SShri Abhyankar 1424533e6011SShri Abhyankar /* forward solve the U^T */ 1425533e6011SShri Abhyankar for (i = 0; i < n; i++) { 1426533e6011SShri Abhyankar v = aa + adiag[i + 1] + 1; 1427533e6011SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1428533e6011SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1429533e6011SShri Abhyankar s1 = tmp[i]; 1430533e6011SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1431533e6011SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1432533e6011SShri Abhyankar tmp[i] = s1; 1433533e6011SShri Abhyankar } 1434533e6011SShri Abhyankar 1435533e6011SShri Abhyankar /* backward solve the L^T */ 1436533e6011SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1437533e6011SShri Abhyankar v = aa + ai[i]; 1438533e6011SShri Abhyankar vi = aj + ai[i]; 1439533e6011SShri Abhyankar nz = ai[i + 1] - ai[i]; 1440533e6011SShri Abhyankar s1 = tmp[i]; 1441c5e3b2a3SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1442533e6011SShri Abhyankar } 1443533e6011SShri Abhyankar 1444533e6011SShri Abhyankar /* copy tmp into x according to permutation */ 1445533e6011SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1446533e6011SShri Abhyankar 14479566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 14489566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1449b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 14509566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 14519566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 1452533e6011SShri Abhyankar 14539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 14543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1455533e6011SShri Abhyankar } 1456533e6011SShri Abhyankar 14578fc3a347SHong Zhang /* 14588a76ed4fSHong Zhang ilu() under revised new data structure. 1459813a1f2bSShri Abhyankar Factored arrays bj and ba are stored as 1460813a1f2bSShri Abhyankar L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1461813a1f2bSShri Abhyankar 1462813a1f2bSShri Abhyankar bi=fact->i is an array of size n+1, in which 1463baabb860SHong Zhang bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1464baabb860SHong Zhang bi[n]: points to L(n-1,n-1)+1 1465baabb860SHong Zhang 1466813a1f2bSShri Abhyankar bdiag=fact->diag is an array of size n+1,in which 1467baabb860SHong Zhang bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1468a24f213cSHong Zhang bdiag[n]: points to entry of U(n-1,0)-1 1469813a1f2bSShri Abhyankar 1470813a1f2bSShri Abhyankar U(i,:) contains bdiag[i] as its last entry, i.e., 1471813a1f2bSShri Abhyankar U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1472813a1f2bSShri Abhyankar */ 1473d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1474d71ae5a4SJacob Faibussowitsch { 1475813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1476421480d9SBarry Smith const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag; 1477bbd65245SShri Abhyankar PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 14781df811f5SHong Zhang IS isicol; 1479813a1f2bSShri Abhyankar 1480813a1f2bSShri Abhyankar PetscFunctionBegin; 14819566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1482421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 14839566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 148457508eceSPierre Jolivet b = (Mat_SeqAIJ *)fact->data; 1485813a1f2bSShri Abhyankar 1486813a1f2bSShri Abhyankar /* allocate matrix arrays for new data structure */ 148784648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a)); 148884648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j)); 14899f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i)); 14909f0612e4SBarry Smith if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 14919f0612e4SBarry Smith b->free_a = PETSC_TRUE; 14929f0612e4SBarry Smith b->free_ij = PETSC_TRUE; 14932205254eSKarl Rupp 1494aa624791SPierre Jolivet if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1495813a1f2bSShri Abhyankar bdiag = b->diag; 1496813a1f2bSShri Abhyankar 1497813a1f2bSShri Abhyankar /* set bi and bj with new data structure */ 1498813a1f2bSShri Abhyankar bi = b->i; 1499813a1f2bSShri Abhyankar bj = b->j; 1500813a1f2bSShri Abhyankar 1501813a1f2bSShri Abhyankar /* L part */ 1502e60cf9a0SBarry Smith bi[0] = 0; 1503813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1504813a1f2bSShri Abhyankar nz = adiag[i] - ai[i]; 1505813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nz; 1506813a1f2bSShri Abhyankar aj = a->j + ai[i]; 1507ad540459SPierre Jolivet for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1508813a1f2bSShri Abhyankar } 1509813a1f2bSShri Abhyankar 1510813a1f2bSShri Abhyankar /* U part */ 151162fbd6afSShri Abhyankar bdiag[n] = bi[n] - 1; 1512813a1f2bSShri Abhyankar for (i = n - 1; i >= 0; i--) { 1513813a1f2bSShri Abhyankar nz = ai[i + 1] - adiag[i] - 1; 1514813a1f2bSShri Abhyankar aj = a->j + adiag[i] + 1; 1515ad540459SPierre Jolivet for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1516813a1f2bSShri Abhyankar /* diag[i] */ 1517bbd65245SShri Abhyankar bj[k++] = i; 1518a24f213cSHong Zhang bdiag[i] = bdiag[i + 1] + nz + 1; 1519813a1f2bSShri Abhyankar } 15201df811f5SHong Zhang 1521d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 15221df811f5SHong Zhang fact->info.factor_mallocs = 0; 15231df811f5SHong Zhang fact->info.fill_ratio_given = info->fill; 15241df811f5SHong Zhang fact->info.fill_ratio_needed = 1.0; 152535233d99SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 15269566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 15271df811f5SHong Zhang 152857508eceSPierre Jolivet b = (Mat_SeqAIJ *)fact->data; 15291df811f5SHong Zhang b->row = isrow; 15301df811f5SHong Zhang b->col = iscol; 15311df811f5SHong Zhang b->icol = isicol; 153284648c2dSPierre Jolivet PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work)); 15339566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 15349566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 15353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1536813a1f2bSShri Abhyankar } 1537813a1f2bSShri Abhyankar 1538d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1539d71ae5a4SJacob Faibussowitsch { 1540813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1541813a1f2bSShri Abhyankar IS isicol; 1542813a1f2bSShri Abhyankar const PetscInt *r, *ic; 15431df811f5SHong Zhang PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1544813a1f2bSShri Abhyankar PetscInt *bi, *cols, nnz, *cols_lvl; 1545813a1f2bSShri Abhyankar PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1546813a1f2bSShri Abhyankar PetscInt i, levels, diagonal_fill; 1547421480d9SBarry Smith PetscBool col_identity, row_identity; 1548813a1f2bSShri Abhyankar PetscReal f; 15490298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1550813a1f2bSShri Abhyankar PetscBT lnkbt; 1551813a1f2bSShri Abhyankar PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 15520298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 15530298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1554421480d9SBarry Smith PetscBool diagDense; 1555813a1f2bSShri Abhyankar 1556813a1f2bSShri Abhyankar PetscFunctionBegin; 155708401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1558421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 1559421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 1560ad04f41aSHong Zhang 1561813a1f2bSShri Abhyankar levels = (PetscInt)info->levels; 15629566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 15639566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 1564813a1f2bSShri Abhyankar if (!levels && row_identity && col_identity) { 1565813a1f2bSShri Abhyankar /* special case: ilu(0) with natural ordering */ 15669566063dSJacob Faibussowitsch PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 15674d12350bSJunchao Zhang if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 15683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1569813a1f2bSShri Abhyankar } 1570813a1f2bSShri Abhyankar 15719566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 15729566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 15739566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 1574813a1f2bSShri Abhyankar 15751bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 15769f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 15779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1578e60cf9a0SBarry Smith bi[0] = bdiag[0] = 0; 15799566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1580813a1f2bSShri Abhyankar 1581813a1f2bSShri Abhyankar /* create a linked list for storing column indices of the active row */ 1582813a1f2bSShri Abhyankar nlnk = n + 1; 15839566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1584813a1f2bSShri Abhyankar 1585813a1f2bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 15861df811f5SHong Zhang f = info->fill; 15871df811f5SHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 15889566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1589813a1f2bSShri Abhyankar current_space = free_space; 15909566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1591813a1f2bSShri Abhyankar current_space_lvl = free_space_lvl; 1592813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1593813a1f2bSShri Abhyankar nzi = 0; 1594813a1f2bSShri Abhyankar /* copy current row into linked list */ 1595813a1f2bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 159628b400f6SJacob Faibussowitsch PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1597813a1f2bSShri Abhyankar cols = aj + ai[r[i]]; 1598813a1f2bSShri Abhyankar lnk[i] = -1; /* marker to indicate if diagonal exists */ 15999566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1600813a1f2bSShri Abhyankar nzi += nlnk; 1601813a1f2bSShri Abhyankar 1602813a1f2bSShri Abhyankar /* make sure diagonal entry is included */ 1603813a1f2bSShri Abhyankar if (diagonal_fill && lnk[i] == -1) { 1604813a1f2bSShri Abhyankar fm = n; 1605813a1f2bSShri Abhyankar while (lnk[fm] < i) fm = lnk[fm]; 1606813a1f2bSShri Abhyankar lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1607813a1f2bSShri Abhyankar lnk[fm] = i; 1608813a1f2bSShri Abhyankar lnk_lvl[i] = 0; 16099371c9d4SSatish Balay nzi++; 16109371c9d4SSatish Balay dcount++; 1611813a1f2bSShri Abhyankar } 1612813a1f2bSShri Abhyankar 1613813a1f2bSShri Abhyankar /* add pivot rows into the active row */ 1614813a1f2bSShri Abhyankar nzbd = 0; 1615813a1f2bSShri Abhyankar prow = lnk[n]; 1616813a1f2bSShri Abhyankar while (prow < i) { 1617813a1f2bSShri Abhyankar nnz = bdiag[prow]; 1618813a1f2bSShri Abhyankar cols = bj_ptr[prow] + nnz + 1; 1619813a1f2bSShri Abhyankar cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1620813a1f2bSShri Abhyankar nnz = bi[prow + 1] - bi[prow] - nnz - 1; 16219566063dSJacob Faibussowitsch PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1622813a1f2bSShri Abhyankar nzi += nlnk; 1623813a1f2bSShri Abhyankar prow = lnk[prow]; 1624813a1f2bSShri Abhyankar nzbd++; 1625813a1f2bSShri Abhyankar } 1626813a1f2bSShri Abhyankar bdiag[i] = nzbd; 1627813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 1628813a1f2bSShri Abhyankar /* if free space is not available, make more free space */ 1629813a1f2bSShri Abhyankar if (current_space->local_remaining < nzi) { 1630f91af8c7SBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 16319566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 16329566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1633813a1f2bSShri Abhyankar reallocs++; 1634813a1f2bSShri Abhyankar } 1635813a1f2bSShri Abhyankar 1636813a1f2bSShri Abhyankar /* copy data into free_space and free_space_lvl, then initialize lnk */ 16379566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1638813a1f2bSShri Abhyankar bj_ptr[i] = current_space->array; 1639813a1f2bSShri Abhyankar bjlvl_ptr[i] = current_space_lvl->array; 1640813a1f2bSShri Abhyankar 1641813a1f2bSShri Abhyankar /* make sure the active row i has diagonal entry */ 164200045ab3SPierre Jolivet PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1643813a1f2bSShri Abhyankar 1644813a1f2bSShri Abhyankar current_space->array += nzi; 1645813a1f2bSShri Abhyankar current_space->local_used += nzi; 1646813a1f2bSShri Abhyankar current_space->local_remaining -= nzi; 1647813a1f2bSShri Abhyankar current_space_lvl->array += nzi; 1648813a1f2bSShri Abhyankar current_space_lvl->local_used += nzi; 1649813a1f2bSShri Abhyankar current_space_lvl->local_remaining -= nzi; 1650813a1f2bSShri Abhyankar } 1651813a1f2bSShri Abhyankar 16529566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 16539566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 1654813a1f2bSShri Abhyankar /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 165584648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 16569566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1657813a1f2bSShri Abhyankar 16589566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 16599566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 16609566063dSJacob Faibussowitsch PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1661813a1f2bSShri Abhyankar 1662813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO) 1663813a1f2bSShri Abhyankar { 1664aef85c9fSShri Abhyankar PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 16659566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 16669566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 16679566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 16689566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 166948a46eb9SPierre Jolivet if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1670813a1f2bSShri Abhyankar } 1671813a1f2bSShri Abhyankar #endif 1672813a1f2bSShri Abhyankar /* put together the new matrix */ 16739566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 167457508eceSPierre Jolivet b = (Mat_SeqAIJ *)fact->data; 1675813a1f2bSShri Abhyankar b->free_ij = PETSC_TRUE; 16769f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 16779f0612e4SBarry Smith b->free_a = PETSC_TRUE; 1678813a1f2bSShri Abhyankar b->j = bj; 1679813a1f2bSShri Abhyankar b->i = bi; 1680813a1f2bSShri Abhyankar b->diag = bdiag; 1681f4259b30SLisandro Dalcin b->ilen = NULL; 1682f4259b30SLisandro Dalcin b->imax = NULL; 1683813a1f2bSShri Abhyankar b->row = isrow; 1684813a1f2bSShri Abhyankar b->col = iscol; 16859566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 16869566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 1687813a1f2bSShri Abhyankar b->icol = isicol; 16882205254eSKarl Rupp 168984648c2dSPierre Jolivet PetscCall(PetscMalloc1(n, &b->solve_work)); 1690813a1f2bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. 1691813a1f2bSShri Abhyankar Allocate bdiag, solve_work, new a, new j */ 1692f268cba8SShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 16932205254eSKarl Rupp 169457508eceSPierre Jolivet fact->info.factor_mallocs = reallocs; 169557508eceSPierre Jolivet fact->info.fill_ratio_given = f; 169657508eceSPierre Jolivet fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 169757508eceSPierre Jolivet fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 16984d12350bSJunchao Zhang if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 16999566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 17003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1701813a1f2bSShri Abhyankar } 1702813a1f2bSShri Abhyankar 1703d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 1704d71ae5a4SJacob Faibussowitsch { 17055f44c854SHong Zhang Mat C = B; 17065f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 17075f44c854SHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 17085f44c854SHong Zhang IS ip = b->row, iip = b->icol; 17095f44c854SHong Zhang const PetscInt *rip, *riip; 17105f44c854SHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 17115f44c854SHong Zhang PetscInt *ai = a->i, *aj = a->j; 17125f44c854SHong Zhang PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1713b65878eeSJunchao Zhang MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 1714ace3abfcSBarry Smith PetscBool perm_identity; 1715d90ac83dSHong Zhang FactorShiftCtx sctx; 171698a93161SHong Zhang PetscReal rs; 1717b65878eeSJunchao Zhang const MatScalar *aa, *v; 1718b65878eeSJunchao Zhang MatScalar d; 1719421480d9SBarry Smith const PetscInt *adiag; 172098a93161SHong Zhang 17215f44c854SHong Zhang PetscFunctionBegin; 1722421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1723b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 172498a93161SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 17259566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 172698a93161SHong Zhang 1727f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 172898a93161SHong Zhang sctx.shift_top = info->zeropivot; 172998a93161SHong Zhang for (i = 0; i < mbs; i++) { 173098a93161SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1731421480d9SBarry Smith d = aa[adiag[i]]; 173298a93161SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 173398a93161SHong Zhang v = aa + ai[i]; 173498a93161SHong Zhang nz = ai[i + 1] - ai[i]; 17352205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 173698a93161SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 173798a93161SHong Zhang } 173898a93161SHong Zhang sctx.shift_top *= 1.1; 173998a93161SHong Zhang sctx.nshift_max = 5; 174098a93161SHong Zhang sctx.shift_lo = 0.; 174198a93161SHong Zhang sctx.shift_hi = 1.; 174298a93161SHong Zhang } 17435f44c854SHong Zhang 17449566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 17459566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 17465f44c854SHong Zhang 17475f44c854SHong Zhang /* allocate working arrays 17485f44c854SHong Zhang c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 17495f44c854SHong Zhang il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 17505f44c854SHong Zhang */ 17519566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 17525f44c854SHong Zhang 175398a93161SHong Zhang do { 175407b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 175598a93161SHong Zhang 1756c5546cabSHong Zhang for (i = 0; i < mbs; i++) c2r[i] = mbs; 17572e987568SSatish Balay if (mbs) il[0] = 0; 17585f44c854SHong Zhang 17595f44c854SHong Zhang for (k = 0; k < mbs; k++) { 17605f44c854SHong Zhang /* zero rtmp */ 17615f44c854SHong Zhang nz = bi[k + 1] - bi[k]; 17625f44c854SHong Zhang bjtmp = bj + bi[k]; 17635f44c854SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 17645f44c854SHong Zhang 17655f44c854SHong Zhang /* load in initial unfactored row */ 17665f44c854SHong Zhang bval = ba + bi[k]; 17679371c9d4SSatish Balay jmin = ai[rip[k]]; 17689371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 17695f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 17705f44c854SHong Zhang col = riip[aj[j]]; 17715f44c854SHong Zhang if (col >= k) { /* only take upper triangular entry */ 17725f44c854SHong Zhang rtmp[col] = aa[j]; 17735f44c854SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 17745f44c854SHong Zhang } 17755f44c854SHong Zhang } 177698a93161SHong Zhang /* shift the diagonal of the matrix: ZeropivotApply() */ 177798a93161SHong Zhang rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 17785f44c854SHong Zhang 17795f44c854SHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 17805f44c854SHong Zhang dk = rtmp[k]; 17815f44c854SHong Zhang i = c2r[k]; /* first row to be added to k_th row */ 17825f44c854SHong Zhang 17835f44c854SHong Zhang while (i < k) { 17845f44c854SHong Zhang nexti = c2r[i]; /* next row to be added to k_th row */ 17855f44c854SHong Zhang 17865f44c854SHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 17875f44c854SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 17885f44c854SHong Zhang uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 17895f44c854SHong Zhang dk += uikdi * ba[ili]; /* update diag[k] */ 17905f44c854SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 17915f44c854SHong Zhang 17925f44c854SHong Zhang /* add multiple of row i to k-th row */ 17939371c9d4SSatish Balay jmin = ili + 1; 17949371c9d4SSatish Balay jmax = bi[i + 1]; 17955f44c854SHong Zhang if (jmin < jmax) { 17965f44c854SHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 17975f44c854SHong Zhang /* update il and c2r for row i */ 17985f44c854SHong Zhang il[i] = jmin; 17999371c9d4SSatish Balay j = bj[jmin]; 18009371c9d4SSatish Balay c2r[i] = c2r[j]; 18019371c9d4SSatish Balay c2r[j] = i; 18025f44c854SHong Zhang } 18035f44c854SHong Zhang i = nexti; 18045f44c854SHong Zhang } 18055f44c854SHong Zhang 18065f44c854SHong Zhang /* copy data into U(k,:) */ 180798a93161SHong Zhang rs = 0.0; 18089371c9d4SSatish Balay jmin = bi[k]; 18099371c9d4SSatish Balay jmax = bi[k + 1] - 1; 18105f44c854SHong Zhang if (jmin < jmax) { 18115f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 18129371c9d4SSatish Balay col = bj[j]; 18139371c9d4SSatish Balay ba[j] = rtmp[col]; 18149371c9d4SSatish Balay rs += PetscAbsScalar(ba[j]); 18155f44c854SHong Zhang } 18165f44c854SHong Zhang /* add the k-th row into il and c2r */ 18175f44c854SHong Zhang il[k] = jmin; 18189371c9d4SSatish Balay i = bj[jmin]; 18199371c9d4SSatish Balay c2r[k] = c2r[i]; 18209371c9d4SSatish Balay c2r[i] = k; 18215f44c854SHong Zhang } 182298a93161SHong Zhang 182398a93161SHong Zhang /* MatPivotCheck() */ 182498a93161SHong Zhang sctx.rs = rs; 182598a93161SHong Zhang sctx.pv = dk; 18269566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 182707b50cabSHong Zhang if (sctx.newshift) break; 182898a93161SHong Zhang dk = sctx.pv; 182998a93161SHong Zhang 183098a93161SHong Zhang ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 183198a93161SHong Zhang } 183207b50cabSHong Zhang } while (sctx.newshift); 18335f44c854SHong Zhang 1834b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 18359566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, c2r)); 18369566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 18379566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 18385f44c854SHong Zhang 18399566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 18405f44c854SHong Zhang if (perm_identity) { 184135233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 184235233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 18432169352eSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 18442169352eSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 18455f44c854SHong Zhang } else { 184635233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1; 184735233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 184835233d99SShri Abhyankar B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 184935233d99SShri Abhyankar B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 18505f44c854SHong Zhang } 18515f44c854SHong Zhang 18525f44c854SHong Zhang C->assembled = PETSC_TRUE; 18535f44c854SHong Zhang C->preallocated = PETSC_TRUE; 18542205254eSKarl Rupp 18559566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 185698a93161SHong Zhang 185798a93161SHong Zhang /* MatPivotView() */ 185898a93161SHong Zhang if (sctx.nshift) { 1859f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 18609566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 1861f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 18629566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1863f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 18649566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 186598a93161SHong Zhang } 186698a93161SHong Zhang } 18673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18685f44c854SHong Zhang } 18695f44c854SHong Zhang 1870d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 1871d71ae5a4SJacob Faibussowitsch { 1872719d5645SBarry Smith Mat C = B; 18730968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 18742ed133dbSHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 18759bfd6278SHong Zhang IS ip = b->row, iip = b->icol; 18765d0c19d7SBarry Smith const PetscInt *rip, *riip; 1877c5546cabSHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 18782ed133dbSHong Zhang PetscInt *ai = a->i, *aj = a->j; 18792ed133dbSHong Zhang PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 1880b65878eeSJunchao Zhang MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 1881b65878eeSJunchao Zhang const MatScalar *aa, *v; 1882ace3abfcSBarry Smith PetscBool perm_identity; 18830e95ead3SHong Zhang FactorShiftCtx sctx; 18840e95ead3SHong Zhang PetscReal rs; 1885b65878eeSJunchao Zhang MatScalar d; 1886421480d9SBarry Smith const PetscInt *adiag; 1887d5d45c9bSBarry Smith 1888a6175056SHong Zhang PetscFunctionBegin; 1889421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1890b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 18910e95ead3SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 18929566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 18930e95ead3SHong Zhang 18940e95ead3SHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 18950e95ead3SHong Zhang sctx.shift_top = info->zeropivot; 18960e95ead3SHong Zhang for (i = 0; i < mbs; i++) { 18970e95ead3SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1898421480d9SBarry Smith d = aa[adiag[i]]; 18990e95ead3SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 19000e95ead3SHong Zhang v = aa + ai[i]; 19010e95ead3SHong Zhang nz = ai[i + 1] - ai[i]; 19022205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 19030e95ead3SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 19040e95ead3SHong Zhang } 19050e95ead3SHong Zhang sctx.shift_top *= 1.1; 19060e95ead3SHong Zhang sctx.nshift_max = 5; 19070e95ead3SHong Zhang sctx.shift_lo = 0.; 19080e95ead3SHong Zhang sctx.shift_hi = 1.; 19090e95ead3SHong Zhang } 1910ee45ca4aSHong Zhang 19119566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 19129566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 19132ed133dbSHong Zhang 19142ed133dbSHong Zhang /* initialization */ 19159566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 19160e95ead3SHong Zhang 19172ed133dbSHong Zhang do { 191807b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 19190e95ead3SHong Zhang 1920c5546cabSHong Zhang for (i = 0; i < mbs; i++) jl[i] = mbs; 1921c5546cabSHong Zhang il[0] = 0; 19222ed133dbSHong Zhang 19232ed133dbSHong Zhang for (k = 0; k < mbs; k++) { 1924c5546cabSHong Zhang /* zero rtmp */ 1925c5546cabSHong Zhang nz = bi[k + 1] - bi[k]; 1926c5546cabSHong Zhang bjtmp = bj + bi[k]; 1927c5546cabSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1928c5546cabSHong Zhang 1929c05c3958SHong Zhang bval = ba + bi[k]; 19302ed133dbSHong Zhang /* initialize k-th row by the perm[k]-th row of A */ 19319371c9d4SSatish Balay jmin = ai[rip[k]]; 19329371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 19332ed133dbSHong Zhang for (j = jmin; j < jmax; j++) { 19349bfd6278SHong Zhang col = riip[aj[j]]; 19352ed133dbSHong Zhang if (col >= k) { /* only take upper triangular entry */ 19362ed133dbSHong Zhang rtmp[col] = aa[j]; 1937c05c3958SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 19382ed133dbSHong Zhang } 19392ed133dbSHong Zhang } 194039e3d630SHong Zhang /* shift the diagonal of the matrix */ 1941540703acSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount; 19422ed133dbSHong Zhang 19432ed133dbSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 19442ed133dbSHong Zhang dk = rtmp[k]; 19452ed133dbSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 19462ed133dbSHong Zhang 19472ed133dbSHong Zhang while (i < k) { 19482ed133dbSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 19492ed133dbSHong Zhang 19502ed133dbSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 19512ed133dbSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 19522ed133dbSHong Zhang uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 19532ed133dbSHong Zhang dk += uikdi * ba[ili]; 19542ed133dbSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 19552ed133dbSHong Zhang 19562ed133dbSHong Zhang /* add multiple of row i to k-th row */ 19579371c9d4SSatish Balay jmin = ili + 1; 19589371c9d4SSatish Balay jmax = bi[i + 1]; 19592ed133dbSHong Zhang if (jmin < jmax) { 19602ed133dbSHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 19612ed133dbSHong Zhang /* update il and jl for row i */ 19622ed133dbSHong Zhang il[i] = jmin; 19639371c9d4SSatish Balay j = bj[jmin]; 19649371c9d4SSatish Balay jl[i] = jl[j]; 19659371c9d4SSatish Balay jl[j] = i; 19662ed133dbSHong Zhang } 19672ed133dbSHong Zhang i = nexti; 19682ed133dbSHong Zhang } 19692ed133dbSHong Zhang 19700697b6caSHong Zhang /* shift the diagonals when zero pivot is detected */ 19710697b6caSHong Zhang /* compute rs=sum of abs(off-diagonal) */ 19720697b6caSHong Zhang rs = 0.0; 19733c9e8598SHong Zhang jmin = bi[k] + 1; 19743c9e8598SHong Zhang nz = bi[k + 1] - jmin; 19753c9e8598SHong Zhang bcol = bj + jmin; 1976ad540459SPierre Jolivet for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 1977540703acSHong Zhang 1978540703acSHong Zhang sctx.rs = rs; 1979540703acSHong Zhang sctx.pv = dk; 19809566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 198107b50cabSHong Zhang if (sctx.newshift) break; 19820e95ead3SHong Zhang dk = sctx.pv; 19833c9e8598SHong Zhang 19843c9e8598SHong Zhang /* copy data into U(k,:) */ 198539e3d630SHong Zhang ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 19869371c9d4SSatish Balay jmin = bi[k] + 1; 19879371c9d4SSatish Balay jmax = bi[k + 1]; 198839e3d630SHong Zhang if (jmin < jmax) { 198939e3d630SHong Zhang for (j = jmin; j < jmax; j++) { 19909371c9d4SSatish Balay col = bj[j]; 19919371c9d4SSatish Balay ba[j] = rtmp[col]; 19923c9e8598SHong Zhang } 199339e3d630SHong Zhang /* add the k-th row into il and jl */ 19943c9e8598SHong Zhang il[k] = jmin; 19959371c9d4SSatish Balay i = bj[jmin]; 19969371c9d4SSatish Balay jl[k] = jl[i]; 19979371c9d4SSatish Balay jl[i] = k; 19983c9e8598SHong Zhang } 19993c9e8598SHong Zhang } 200007b50cabSHong Zhang } while (sctx.newshift); 20010e95ead3SHong Zhang 20029566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, jl)); 20039566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 20049566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 2005b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 2006db4efbfdSBarry Smith 20079566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 2008db4efbfdSBarry Smith if (perm_identity) { 20090a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 20100a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 20110a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 20120a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2013db4efbfdSBarry Smith } else { 20140a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 20150a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 20160a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 20170a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2018db4efbfdSBarry Smith } 2019db4efbfdSBarry Smith 20203c9e8598SHong Zhang C->assembled = PETSC_TRUE; 20213c9e8598SHong Zhang C->preallocated = PETSC_TRUE; 20222205254eSKarl Rupp 20239566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 2024540703acSHong Zhang if (sctx.nshift) { 2025f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 20269566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2027f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 20289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 20290697b6caSHong Zhang } 20303c9e8598SHong Zhang } 20313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20323c9e8598SHong Zhang } 2033a6175056SHong Zhang 20348a76ed4fSHong Zhang /* 20358a76ed4fSHong Zhang icc() under revised new data structure. 20368a76ed4fSHong Zhang Factored arrays bj and ba are stored as 20378a76ed4fSHong Zhang U(0,:),...,U(i,:),U(n-1,:) 20388a76ed4fSHong Zhang 20398a76ed4fSHong Zhang ui=fact->i is an array of size n+1, in which 20408a76ed4fSHong Zhang ui+ 20418a76ed4fSHong Zhang ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 20428a76ed4fSHong Zhang ui[n]: points to U(n-1,n-1)+1 20438a76ed4fSHong Zhang 20448a76ed4fSHong Zhang udiag=fact->diag is an array of size n,in which 20458a76ed4fSHong Zhang udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 20468a76ed4fSHong Zhang 20478a76ed4fSHong Zhang U(i,:) contains udiag[i] as its last entry, i.e., 20488a76ed4fSHong Zhang U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 20498a76ed4fSHong Zhang */ 20508a76ed4fSHong Zhang 2051d71ae5a4SJacob Faibussowitsch PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2052d71ae5a4SJacob Faibussowitsch { 20530968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2054ed59401aSHong Zhang Mat_SeqSBAIJ *b; 2055421480d9SBarry Smith PetscBool perm_identity; 20565f44c854SHong Zhang PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2057421480d9SBarry Smith const PetscInt *rip, *riip, *adiag; 2058ed59401aSHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2059421480d9SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 20605a8e39fbSHong Zhang PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2061ed59401aSHong Zhang PetscReal fill = info->fill, levels = info->levels; 20620298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 20630298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 20647a48dd6fSHong Zhang PetscBT lnkbt; 2065b635d36bSHong Zhang IS iperm; 2066421480d9SBarry Smith PetscBool diagDense; 2067a6175056SHong Zhang 2068a6175056SHong Zhang PetscFunctionBegin; 206908401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2070421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense)); 2071421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 20729566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 20739566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 20747a48dd6fSHong Zhang 20759f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 20769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 207739e3d630SHong Zhang ui[0] = 0; 207839e3d630SHong Zhang 20798a76ed4fSHong Zhang /* ICC(0) without matrix ordering: simply rearrange column indices */ 2080622e2028SHong Zhang if (!levels && perm_identity) { 20815f44c854SHong Zhang for (i = 0; i < am; i++) { 2082421480d9SBarry Smith ncols = ai[i + 1] - adiag[i]; 2083c5546cabSHong Zhang ui[i + 1] = ui[i] + ncols; 2084c5546cabSHong Zhang udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2085dc1e2a3fSHong Zhang } 20869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2087dc1e2a3fSHong Zhang cols = uj; 2088dc1e2a3fSHong Zhang for (i = 0; i < am; i++) { 2089421480d9SBarry Smith aj = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2090421480d9SBarry Smith ncols = ai[i + 1] - adiag[i] - 1; 20915f44c854SHong Zhang for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2092910cf402Sprj- *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 20935f44c854SHong Zhang } 20945f44c854SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 20959566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 20969566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 20975f44c854SHong Zhang 20985f44c854SHong Zhang /* initialization */ 20999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ajtmp)); 21005f44c854SHong Zhang 21015f44c854SHong Zhang /* jl: linked list for storing indices of the pivot rows 21025f44c854SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 21039566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 21045f44c854SHong Zhang for (i = 0; i < am; i++) { 21059371c9d4SSatish Balay jl[i] = am; 21069371c9d4SSatish Balay il[i] = 0; 21075f44c854SHong Zhang } 21085f44c854SHong Zhang 21095f44c854SHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 21105f44c854SHong Zhang nlnk = am + 1; 21119566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 21125f44c854SHong Zhang 211395b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 21149566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 21155f44c854SHong Zhang current_space = free_space; 21169566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 21175f44c854SHong Zhang current_space_lvl = free_space_lvl; 21185f44c854SHong Zhang 21195f44c854SHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 21205f44c854SHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 21215f44c854SHong Zhang nzk = 0; 21225f44c854SHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 212328b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 21245f44c854SHong Zhang ncols_upper = 0; 21255f44c854SHong Zhang for (j = 0; j < ncols; j++) { 21265f44c854SHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 21275f44c854SHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 21285f44c854SHong Zhang ajtmp[ncols_upper] = i; 21295f44c854SHong Zhang ncols_upper++; 21305f44c854SHong Zhang } 21315f44c854SHong Zhang } 21329566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 21335f44c854SHong Zhang nzk += nlnk; 21345f44c854SHong Zhang 21355f44c854SHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 21365f44c854SHong Zhang prow = jl[k]; /* 1st pivot row */ 21375f44c854SHong Zhang 21385f44c854SHong Zhang while (prow < k) { 21395f44c854SHong Zhang nextprow = jl[prow]; 21405f44c854SHong Zhang 21415f44c854SHong Zhang /* merge prow into k-th row */ 21425f44c854SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 21435f44c854SHong Zhang jmax = ui[prow + 1]; 21445f44c854SHong Zhang ncols = jmax - jmin; 21455f44c854SHong Zhang i = jmin - ui[prow]; 21465f44c854SHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 21475f44c854SHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 21485f44c854SHong Zhang j = *(uj - 1); 21499566063dSJacob Faibussowitsch PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 21505f44c854SHong Zhang nzk += nlnk; 21515f44c854SHong Zhang 21525f44c854SHong Zhang /* update il and jl for prow */ 21535f44c854SHong Zhang if (jmin < jmax) { 21545f44c854SHong Zhang il[prow] = jmin; 21559371c9d4SSatish Balay j = *cols; 21569371c9d4SSatish Balay jl[prow] = jl[j]; 21579371c9d4SSatish Balay jl[j] = prow; 21585f44c854SHong Zhang } 21595f44c854SHong Zhang prow = nextprow; 21605f44c854SHong Zhang } 21615f44c854SHong Zhang 21625f44c854SHong Zhang /* if free space is not available, make more free space */ 21635f44c854SHong Zhang if (current_space->local_remaining < nzk) { 21645f44c854SHong Zhang i = am - k + 1; /* num of unfactored rows */ 2165f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 21669566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 21679566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 21685f44c854SHong Zhang reallocs++; 21695f44c854SHong Zhang } 21705f44c854SHong Zhang 21715f44c854SHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 217208401ef6SPierre Jolivet PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 21739566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 21745f44c854SHong Zhang 21755f44c854SHong Zhang /* add the k-th row into il and jl */ 21765f44c854SHong Zhang if (nzk > 1) { 21775f44c854SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 21789371c9d4SSatish Balay jl[k] = jl[i]; 21799371c9d4SSatish Balay jl[i] = k; 21805f44c854SHong Zhang il[k] = ui[k] + 1; 21815f44c854SHong Zhang } 21825f44c854SHong Zhang uj_ptr[k] = current_space->array; 21835f44c854SHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 21845f44c854SHong Zhang 21855f44c854SHong Zhang current_space->array += nzk; 21865f44c854SHong Zhang current_space->local_used += nzk; 21875f44c854SHong Zhang current_space->local_remaining -= nzk; 21885f44c854SHong Zhang 21895f44c854SHong Zhang current_space_lvl->array += nzk; 21905f44c854SHong Zhang current_space_lvl->local_used += nzk; 21915f44c854SHong Zhang current_space_lvl->local_remaining -= nzk; 21925f44c854SHong Zhang 21935f44c854SHong Zhang ui[k + 1] = ui[k] + nzk; 21945f44c854SHong Zhang } 21955f44c854SHong Zhang 21969566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 21979566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 21989566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 21999566063dSJacob Faibussowitsch PetscCall(PetscFree(ajtmp)); 22005f44c854SHong Zhang 22019263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 22029f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj)); 22039566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 22049566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 22059566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 22065f44c854SHong Zhang 22075f44c854SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 22085f44c854SHong Zhang 22095f44c854SHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 221057508eceSPierre Jolivet b = (Mat_SeqSBAIJ *)fact->data; 22119f0612e4SBarry Smith b->free_ij = PETSC_TRUE; 221284648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 22139f0612e4SBarry Smith b->free_a = PETSC_TRUE; 22145f44c854SHong Zhang b->j = uj; 22155f44c854SHong Zhang b->i = ui; 22165f44c854SHong Zhang b->diag = udiag; 2217f4259b30SLisandro Dalcin b->ilen = NULL; 2218f4259b30SLisandro Dalcin b->imax = NULL; 22195f44c854SHong Zhang b->row = perm; 22205f44c854SHong Zhang b->col = perm; 22219566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 22229566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 22235f44c854SHong Zhang b->icol = iperm; 22245f44c854SHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 22252205254eSKarl Rupp 222684648c2dSPierre Jolivet PetscCall(PetscMalloc1(am, &b->solve_work)); 22272205254eSKarl Rupp 22285f44c854SHong Zhang b->maxnz = b->nz = ui[am]; 22295f44c854SHong Zhang 2230f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2231f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 22325f44c854SHong Zhang if (ai[am] != 0) { 22336a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 223495b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 22355f44c854SHong Zhang } else { 2236f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 22375f44c854SHong Zhang } 22389263d837SHong Zhang #if defined(PETSC_USE_INFO) 22399263d837SHong Zhang if (ai[am] != 0) { 22409263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 22419566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 22429566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 22439566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 22449263d837SHong Zhang } else { 22459566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 22469263d837SHong Zhang } 22479263d837SHong Zhang #endif 224835233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 22493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 22505f44c854SHong Zhang } 22515f44c854SHong Zhang 2252d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2253d71ae5a4SJacob Faibussowitsch { 22540be760fbSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 22550be760fbSHong Zhang Mat_SeqSBAIJ *b; 2256421480d9SBarry Smith PetscBool perm_identity; 22570be760fbSHong Zhang PetscReal fill = info->fill; 22580be760fbSHong Zhang const PetscInt *rip, *riip; 22590be760fbSHong Zhang PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 22600be760fbSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 22610be760fbSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 22620298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 22630be760fbSHong Zhang PetscBT lnkbt; 22640be760fbSHong Zhang IS iperm; 2265421480d9SBarry Smith PetscBool diagDense; 22660be760fbSHong Zhang 22670be760fbSHong Zhang PetscFunctionBegin; 226808401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2269421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 2270421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 22719186b105SHong Zhang 22720be760fbSHong Zhang /* check whether perm is the identity mapping */ 22739566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 22749566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 22759566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 22769566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 22770be760fbSHong Zhang 22780be760fbSHong Zhang /* initialization */ 22799f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 22809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 22810be760fbSHong Zhang ui[0] = 0; 22820be760fbSHong Zhang 22830be760fbSHong Zhang /* jl: linked list for storing indices of the pivot rows 22840be760fbSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 22859566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 22860be760fbSHong Zhang for (i = 0; i < am; i++) { 22879371c9d4SSatish Balay jl[i] = am; 22889371c9d4SSatish Balay il[i] = 0; 22890be760fbSHong Zhang } 22900be760fbSHong Zhang 22910be760fbSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 22920be760fbSHong Zhang nlnk = am + 1; 22939566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 22940be760fbSHong Zhang 229595b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 22969566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 22970be760fbSHong Zhang current_space = free_space; 22980be760fbSHong Zhang 22990be760fbSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 23000be760fbSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 23010be760fbSHong Zhang nzk = 0; 23020be760fbSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 230328b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 23040be760fbSHong Zhang ncols_upper = 0; 23050be760fbSHong Zhang for (j = 0; j < ncols; j++) { 23060be760fbSHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 23070be760fbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 23080be760fbSHong Zhang cols[ncols_upper] = i; 23090be760fbSHong Zhang ncols_upper++; 23100be760fbSHong Zhang } 23110be760fbSHong Zhang } 23129566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 23130be760fbSHong Zhang nzk += nlnk; 23140be760fbSHong Zhang 23150be760fbSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 23160be760fbSHong Zhang prow = jl[k]; /* 1st pivot row */ 23170be760fbSHong Zhang 23180be760fbSHong Zhang while (prow < k) { 23190be760fbSHong Zhang nextprow = jl[prow]; 23200be760fbSHong Zhang /* merge prow into k-th row */ 23210be760fbSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 23220be760fbSHong Zhang jmax = ui[prow + 1]; 23230be760fbSHong Zhang ncols = jmax - jmin; 23240be760fbSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 23259566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 23260be760fbSHong Zhang nzk += nlnk; 23270be760fbSHong Zhang 23280be760fbSHong Zhang /* update il and jl for prow */ 23290be760fbSHong Zhang if (jmin < jmax) { 23300be760fbSHong Zhang il[prow] = jmin; 23312205254eSKarl Rupp j = *uj_ptr; 23322205254eSKarl Rupp jl[prow] = jl[j]; 23332205254eSKarl Rupp jl[j] = prow; 23340be760fbSHong Zhang } 23350be760fbSHong Zhang prow = nextprow; 23360be760fbSHong Zhang } 23370be760fbSHong Zhang 23380be760fbSHong Zhang /* if free space is not available, make more free space */ 23390be760fbSHong Zhang if (current_space->local_remaining < nzk) { 23400be760fbSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2341f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 23429566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 23430be760fbSHong Zhang reallocs++; 23440be760fbSHong Zhang } 23450be760fbSHong Zhang 23460be760fbSHong Zhang /* copy data into free space, then initialize lnk */ 23479566063dSJacob Faibussowitsch PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 23480be760fbSHong Zhang 23490be760fbSHong Zhang /* add the k-th row into il and jl */ 23507b056e98SHong Zhang if (nzk > 1) { 23510be760fbSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 23529371c9d4SSatish Balay jl[k] = jl[i]; 23539371c9d4SSatish Balay jl[i] = k; 23540be760fbSHong Zhang il[k] = ui[k] + 1; 23550be760fbSHong Zhang } 23560be760fbSHong Zhang ui_ptr[k] = current_space->array; 23572205254eSKarl Rupp 23580be760fbSHong Zhang current_space->array += nzk; 23590be760fbSHong Zhang current_space->local_used += nzk; 23600be760fbSHong Zhang current_space->local_remaining -= nzk; 23610be760fbSHong Zhang 23620be760fbSHong Zhang ui[k + 1] = ui[k] + nzk; 23630be760fbSHong Zhang } 23640be760fbSHong Zhang 23659566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 23669566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 23679566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 23680be760fbSHong Zhang 23699263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 237084648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj)); 23719566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 23729566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 23730be760fbSHong Zhang 23740be760fbSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 2375f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 23760be760fbSHong Zhang b->free_ij = PETSC_TRUE; 237784648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 23789f0612e4SBarry Smith b->free_a = PETSC_TRUE; 23790be760fbSHong Zhang b->j = uj; 23800be760fbSHong Zhang b->i = ui; 23810be760fbSHong Zhang b->diag = udiag; 2382f4259b30SLisandro Dalcin b->ilen = NULL; 2383f4259b30SLisandro Dalcin b->imax = NULL; 23840be760fbSHong Zhang b->row = perm; 23850be760fbSHong Zhang b->col = perm; 238626fbe8dcSKarl Rupp 23879566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 23889566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 23892205254eSKarl Rupp 23900be760fbSHong Zhang b->icol = iperm; 23910be760fbSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 239226fbe8dcSKarl Rupp 239384648c2dSPierre Jolivet PetscCall(PetscMalloc1(am, &b->solve_work)); 23942205254eSKarl Rupp 23950be760fbSHong Zhang b->maxnz = b->nz = ui[am]; 23960be760fbSHong Zhang 2397f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2398f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 23990be760fbSHong Zhang if (ai[am] != 0) { 24006a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 240195b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 24020be760fbSHong Zhang } else { 2403f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 24040be760fbSHong Zhang } 24059263d837SHong Zhang #if defined(PETSC_USE_INFO) 24069263d837SHong Zhang if (ai[am] != 0) { 24079263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 24089566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 24099566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 24109566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 24119263d837SHong Zhang } else { 24129566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 24139263d837SHong Zhang } 24149263d837SHong Zhang #endif 241535233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 24163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24170be760fbSHong Zhang } 24180be760fbSHong Zhang 2419d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 2420d71ae5a4SJacob Faibussowitsch { 2421813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2422813a1f2bSShri Abhyankar PetscInt n = A->rmap->n; 2423813a1f2bSShri Abhyankar const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 2424813a1f2bSShri Abhyankar PetscScalar *x, sum; 2425813a1f2bSShri Abhyankar const PetscScalar *b; 2426b65878eeSJunchao Zhang const MatScalar *aa, *v; 2427813a1f2bSShri Abhyankar PetscInt i, nz; 2428813a1f2bSShri Abhyankar 2429813a1f2bSShri Abhyankar PetscFunctionBegin; 24303ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 2431813a1f2bSShri Abhyankar 2432b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 24339566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 24349566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 2435813a1f2bSShri Abhyankar 2436813a1f2bSShri Abhyankar /* forward solve the lower triangular */ 2437813a1f2bSShri Abhyankar x[0] = b[0]; 2438813a1f2bSShri Abhyankar v = aa; 2439813a1f2bSShri Abhyankar vi = aj; 2440813a1f2bSShri Abhyankar for (i = 1; i < n; i++) { 2441813a1f2bSShri Abhyankar nz = ai[i + 1] - ai[i]; 2442813a1f2bSShri Abhyankar sum = b[i]; 2443813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 2444813a1f2bSShri Abhyankar v += nz; 2445813a1f2bSShri Abhyankar vi += nz; 2446813a1f2bSShri Abhyankar x[i] = sum; 2447813a1f2bSShri Abhyankar } 2448813a1f2bSShri Abhyankar 2449813a1f2bSShri Abhyankar /* backward solve the upper triangular */ 245062fbd6afSShri Abhyankar for (i = n - 1; i >= 0; i--) { 24513c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 24523c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 2453813a1f2bSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 2454813a1f2bSShri Abhyankar sum = x[i]; 2455813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 24563c0119dfSShri Abhyankar x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 2457813a1f2bSShri Abhyankar } 2458813a1f2bSShri Abhyankar 24599566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 2460b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 24619566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 24629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 24633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2464813a1f2bSShri Abhyankar } 2465813a1f2bSShri Abhyankar 2466d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 2467d71ae5a4SJacob Faibussowitsch { 2468f268cba8SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2469f268cba8SShri Abhyankar IS iscol = a->col, isrow = a->row; 2470f268cba8SShri Abhyankar PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 2471f268cba8SShri Abhyankar const PetscInt *rout, *cout, *r, *c; 2472f268cba8SShri Abhyankar PetscScalar *x, *tmp, sum; 2473f268cba8SShri Abhyankar const PetscScalar *b; 2474b65878eeSJunchao Zhang const MatScalar *aa, *v; 2475f268cba8SShri Abhyankar 2476f268cba8SShri Abhyankar PetscFunctionBegin; 24773ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 2478f268cba8SShri Abhyankar 2479b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 24809566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 24819566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 2482f268cba8SShri Abhyankar tmp = a->solve_work; 2483f268cba8SShri Abhyankar 24849371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 24859371c9d4SSatish Balay r = rout; 24869371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 24879371c9d4SSatish Balay c = cout; 2488f268cba8SShri Abhyankar 2489f268cba8SShri Abhyankar /* forward solve the lower triangular */ 2490f268cba8SShri Abhyankar tmp[0] = b[r[0]]; 2491f268cba8SShri Abhyankar v = aa; 2492f268cba8SShri Abhyankar vi = aj; 2493f268cba8SShri Abhyankar for (i = 1; i < n; i++) { 2494f268cba8SShri Abhyankar nz = ai[i + 1] - ai[i]; 2495f268cba8SShri Abhyankar sum = b[r[i]]; 2496f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 2497f268cba8SShri Abhyankar tmp[i] = sum; 24989371c9d4SSatish Balay v += nz; 24999371c9d4SSatish Balay vi += nz; 2500f268cba8SShri Abhyankar } 2501f268cba8SShri Abhyankar 2502f268cba8SShri Abhyankar /* backward solve the upper triangular */ 2503f268cba8SShri Abhyankar for (i = n - 1; i >= 0; i--) { 25043c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 25053c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 2506f268cba8SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 2507f268cba8SShri Abhyankar sum = tmp[i]; 2508f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 2509f268cba8SShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 2510f268cba8SShri Abhyankar } 2511f268cba8SShri Abhyankar 25129566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 25139566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 2514b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 25159566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 25169566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 25179566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 25183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2519f268cba8SShri Abhyankar } 2520