1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 3c6db04a5SJed Brown #include <petscbt.h> 4c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 53b2fbd54SBarry Smith 6d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 7d71ae5a4SJacob Faibussowitsch { 84ac6704cSBarry Smith PetscFunctionBegin; 94ac6704cSBarry Smith *type = MATSOLVERPETSC; 103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 114ac6704cSBarry Smith } 124ac6704cSBarry Smith 13d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 14d71ae5a4SJacob Faibussowitsch { 15d0f46423SBarry Smith PetscInt n = A->rmap->n; 16b24902e0SBarry Smith 17b24902e0SBarry Smith PetscFunctionBegin; 18891bceaeSHong Zhang #if defined(PETSC_USE_COMPLEX) 1903e5aca4SStefano Zampini if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 2003e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n")); 2103e5aca4SStefano Zampini *B = NULL; 2203e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 2303e5aca4SStefano Zampini } 24891bceaeSHong Zhang #endif 2503e5aca4SStefano Zampini 269566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 279566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, n, n, n, n)); 28599ef60dSHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 299566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQAIJ)); 302205254eSKarl Rupp 3135233d99SShri Abhyankar (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 3235233d99SShri Abhyankar (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 332205254eSKarl Rupp 349566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 359566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 369566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 379566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 38b24902e0SBarry Smith } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 399566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQSBAIJ)); 409566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 412205254eSKarl Rupp 4235233d99SShri Abhyankar (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 4335233d99SShri Abhyankar (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 449566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 459566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 46e32f2f54SBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 47d5f3da31SBarry Smith (*B)->factortype = ftype; 4800c67f3bSHong Zhang 499566063dSJacob Faibussowitsch PetscCall(PetscFree((*B)->solvertype)); 509566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 51f73b0415SBarry Smith (*B)->canuseordering = PETSC_TRUE; 529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 54b24902e0SBarry Smith } 55b24902e0SBarry Smith 56d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 57d71ae5a4SJacob Faibussowitsch { 58ce3d78c0SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 59ce3d78c0SShri Abhyankar IS isicol; 608758e1faSBarry Smith const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 618758e1faSBarry Smith PetscInt i, n = A->rmap->n; 628758e1faSBarry Smith PetscInt *bi, *bj; 63ce3d78c0SShri Abhyankar PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 64ce3d78c0SShri Abhyankar PetscReal f; 65ce3d78c0SShri Abhyankar PetscInt nlnk, *lnk, k, **bi_ptr; 660298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 67ce3d78c0SShri Abhyankar PetscBT lnkbt; 68*421480d9SBarry Smith PetscBool diagDense; 69ce3d78c0SShri Abhyankar 70ce3d78c0SShri Abhyankar PetscFunctionBegin; 7108401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 72*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 73*421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 74ece542f9SHong Zhang 759566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 769566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 779566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 78ce3d78c0SShri Abhyankar 791bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 809f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 82a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 839b48462bSShri Abhyankar 849b48462bSShri Abhyankar /* linked list for storing column indices of the active row */ 859b48462bSShri Abhyankar nlnk = n + 1; 869566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 879b48462bSShri Abhyankar 889566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 899b48462bSShri Abhyankar 909b48462bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 919b48462bSShri Abhyankar f = info->fill; 92aa91b3e7SRichard Tran Mills if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 939566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 949b48462bSShri Abhyankar current_space = free_space; 959b48462bSShri Abhyankar 969b48462bSShri Abhyankar for (i = 0; i < n; i++) { 979b48462bSShri Abhyankar /* copy previous fill into linked list */ 989b48462bSShri Abhyankar nzi = 0; 999b48462bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 1009b48462bSShri Abhyankar ajtmp = aj + ai[r[i]]; 1019566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 1029b48462bSShri Abhyankar nzi += nlnk; 1039b48462bSShri Abhyankar 1049b48462bSShri Abhyankar /* add pivot rows into linked list */ 1059b48462bSShri Abhyankar row = lnk[n]; 1069b48462bSShri Abhyankar while (row < i) { 1079b48462bSShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 1089b48462bSShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 1099566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 1109b48462bSShri Abhyankar nzi += nlnk; 1119b48462bSShri Abhyankar row = lnk[row]; 1129b48462bSShri Abhyankar } 1139b48462bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 1149b48462bSShri Abhyankar im[i] = nzi; 1159b48462bSShri Abhyankar 1169b48462bSShri Abhyankar /* mark bdiag */ 1179b48462bSShri Abhyankar nzbd = 0; 1189b48462bSShri Abhyankar nnz = nzi; 1199b48462bSShri Abhyankar k = lnk[n]; 1209b48462bSShri Abhyankar while (nnz-- && k < i) { 1219b48462bSShri Abhyankar nzbd++; 1229b48462bSShri Abhyankar k = lnk[k]; 1239b48462bSShri Abhyankar } 1249b48462bSShri Abhyankar bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 1259b48462bSShri Abhyankar 1269b48462bSShri Abhyankar /* if free space is not available, make more free space */ 1279b48462bSShri Abhyankar if (current_space->local_remaining < nzi) { 1282f18eb33SBarry Smith /* estimated additional space needed */ 1291df4278eSBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 1309566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1319b48462bSShri Abhyankar reallocs++; 1329b48462bSShri Abhyankar } 1339b48462bSShri Abhyankar 1349b48462bSShri Abhyankar /* copy data into free space, then initialize lnk */ 1359566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 1362205254eSKarl Rupp 1379b48462bSShri Abhyankar bi_ptr[i] = current_space->array; 1389b48462bSShri Abhyankar current_space->array += nzi; 1399b48462bSShri Abhyankar current_space->local_used += nzi; 1409b48462bSShri Abhyankar current_space->local_remaining -= nzi; 1419b48462bSShri Abhyankar } 1429b48462bSShri Abhyankar 1439566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 1449566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 1459b48462bSShri Abhyankar 1469263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 14784648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 1489566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1499566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 1509566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 1519b48462bSShri Abhyankar 1529b48462bSShri Abhyankar /* put together the new matrix */ 1539566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 15457508eceSPierre Jolivet b = (Mat_SeqAIJ *)B->data; 1559b48462bSShri Abhyankar b->free_ij = PETSC_TRUE; 1569f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 1579f0612e4SBarry Smith b->free_a = PETSC_TRUE; 1589b48462bSShri Abhyankar b->j = bj; 1599b48462bSShri Abhyankar b->i = bi; 1609b48462bSShri Abhyankar b->diag = bdiag; 161f4259b30SLisandro Dalcin b->ilen = NULL; 162f4259b30SLisandro Dalcin b->imax = NULL; 1639b48462bSShri Abhyankar b->row = isrow; 1649b48462bSShri Abhyankar b->col = iscol; 1659566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 1669566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 1679b48462bSShri Abhyankar b->icol = isicol; 16884648c2dSPierre Jolivet PetscCall(PetscMalloc1(n, &b->solve_work)); 1699b48462bSShri Abhyankar 1709b48462bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 1719b48462bSShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 1722205254eSKarl Rupp 173d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 174f284f12aSHong Zhang B->info.factor_mallocs = reallocs; 175f284f12aSHong Zhang B->info.fill_ratio_given = f; 1769b48462bSShri Abhyankar 1779b48462bSShri Abhyankar if (ai[n]) { 178f284f12aSHong Zhang B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1799b48462bSShri Abhyankar } else { 180f284f12aSHong Zhang B->info.fill_ratio_needed = 0.0; 1819b48462bSShri Abhyankar } 1829263d837SHong Zhang #if defined(PETSC_USE_INFO) 1839263d837SHong Zhang if (ai[n] != 0) { 1849263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 1859566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1869566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 1879566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 1889566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 1899263d837SHong Zhang } else { 1909566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 1919263d837SHong Zhang } 1929263d837SHong Zhang #endif 19335233d99SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1944d12350bSJunchao Zhang if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1959566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 1963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1979b48462bSShri Abhyankar } 1989b48462bSShri Abhyankar 1995b5bc046SBarry Smith /* 2005b5bc046SBarry Smith Trouble in factorization, should we dump the original matrix? 2015b5bc046SBarry Smith */ 202d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorDumpMatrix(Mat A) 203d71ae5a4SJacob Faibussowitsch { 204ace3abfcSBarry Smith PetscBool flg = PETSC_FALSE; 2055b5bc046SBarry Smith 2065b5bc046SBarry Smith PetscFunctionBegin; 2079566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 2085b5bc046SBarry Smith if (flg) { 2095b5bc046SBarry Smith PetscViewer viewer; 2105b5bc046SBarry Smith char filename[PETSC_MAX_PATH_LEN]; 2115b5bc046SBarry Smith 2129566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 2139566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 2149566063dSJacob Faibussowitsch PetscCall(MatView(A, viewer)); 2159566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&viewer)); 2165b5bc046SBarry Smith } 2173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2185b5bc046SBarry Smith } 2195b5bc046SBarry Smith 220d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 221d71ae5a4SJacob Faibussowitsch { 222c9c72f8fSHong Zhang Mat C = B; 223c9c72f8fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 224c9c72f8fSHong Zhang IS isrow = b->row, isicol = b->icol; 225c9c72f8fSHong Zhang const PetscInt *r, *ic, *ics; 226bbd65245SShri Abhyankar const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 227bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row, *pj; 228bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp; 229bbd65245SShri Abhyankar MatScalar *rtmp, *pc, multiplier, *pv; 230b65878eeSJunchao Zhang const MatScalar *aa, *v; 231b65878eeSJunchao Zhang MatScalar *ba; 232ace3abfcSBarry Smith PetscBool row_identity, col_identity; 233d90ac83dSHong Zhang FactorShiftCtx sctx; 2344f81c4b7SBarry Smith const PetscInt *ddiag; 235d4ad06f7SHong Zhang PetscReal rs; 236d4ad06f7SHong Zhang MatScalar d; 237d4ad06f7SHong Zhang 238c9c72f8fSHong Zhang PetscFunctionBegin; 239b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 240b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 241d6e5152cSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 2429566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 243d4ad06f7SHong Zhang 244f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 245*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 246d4ad06f7SHong Zhang sctx.shift_top = info->zeropivot; 247d4ad06f7SHong Zhang for (i = 0; i < n; i++) { 248d4ad06f7SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 249*421480d9SBarry Smith d = aa[ddiag[i]]; 250d4ad06f7SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 251d4ad06f7SHong Zhang v = aa + ai[i]; 252d4ad06f7SHong Zhang nz = ai[i + 1] - ai[i]; 2532205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 254d4ad06f7SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 255d4ad06f7SHong Zhang } 256d4ad06f7SHong Zhang sctx.shift_top *= 1.1; 257d4ad06f7SHong Zhang sctx.nshift_max = 5; 258d4ad06f7SHong Zhang sctx.shift_lo = 0.; 259d4ad06f7SHong Zhang sctx.shift_hi = 1.; 260d4ad06f7SHong Zhang } 261d4ad06f7SHong Zhang 2629566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 2639566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 2649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 265c9c72f8fSHong Zhang ics = ic; 266c9c72f8fSHong Zhang 267d4ad06f7SHong Zhang do { 26807b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 269c9c72f8fSHong Zhang for (i = 0; i < n; i++) { 270c9c72f8fSHong Zhang /* zero rtmp */ 271c9c72f8fSHong Zhang /* L part */ 272c9c72f8fSHong Zhang nz = bi[i + 1] - bi[i]; 273c9c72f8fSHong Zhang bjtmp = bj + bi[i]; 274c9c72f8fSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 275c9c72f8fSHong Zhang 276c9c72f8fSHong Zhang /* U part */ 27762fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 27862fbd6afSShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 279813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 280813a1f2bSShri Abhyankar 281813a1f2bSShri Abhyankar /* load in initial (unfactored row) */ 282813a1f2bSShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]]; 283813a1f2bSShri Abhyankar ajtmp = aj + ai[r[i]]; 284813a1f2bSShri Abhyankar v = aa + ai[r[i]]; 285ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 286813a1f2bSShri Abhyankar /* ZeropivotApply() */ 28798a93161SHong Zhang rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 288813a1f2bSShri Abhyankar 289813a1f2bSShri Abhyankar /* elimination */ 290813a1f2bSShri Abhyankar bjtmp = bj + bi[i]; 291813a1f2bSShri Abhyankar row = *bjtmp++; 292813a1f2bSShri Abhyankar nzL = bi[i + 1] - bi[i]; 293f268cba8SShri Abhyankar for (k = 0; k < nzL; k++) { 294813a1f2bSShri Abhyankar pc = rtmp + row; 295813a1f2bSShri Abhyankar if (*pc != 0.0) { 296b65878eeSJunchao Zhang pv = ba + bdiag[row]; 297813a1f2bSShri Abhyankar multiplier = *pc * (*pv); 298813a1f2bSShri Abhyankar *pc = multiplier; 2992205254eSKarl Rupp 30062fbd6afSShri Abhyankar pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 301b65878eeSJunchao Zhang pv = ba + bdiag[row + 1] + 1; 30262fbd6afSShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 3032205254eSKarl Rupp 304813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 3059566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 306813a1f2bSShri Abhyankar } 307f268cba8SShri Abhyankar row = *bjtmp++; 308813a1f2bSShri Abhyankar } 309813a1f2bSShri Abhyankar 310813a1f2bSShri Abhyankar /* finished row so stick it into b->a */ 311813a1f2bSShri Abhyankar rs = 0.0; 312813a1f2bSShri Abhyankar /* L part */ 313b65878eeSJunchao Zhang pv = ba + bi[i]; 314813a1f2bSShri Abhyankar pj = b->j + bi[i]; 315813a1f2bSShri Abhyankar nz = bi[i + 1] - bi[i]; 316813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 3179371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 3189371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 319813a1f2bSShri Abhyankar } 320813a1f2bSShri Abhyankar 321813a1f2bSShri Abhyankar /* U part */ 322b65878eeSJunchao Zhang pv = ba + bdiag[i + 1] + 1; 32362fbd6afSShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 32462fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 325813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 3269371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 3279371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 328813a1f2bSShri Abhyankar } 329813a1f2bSShri Abhyankar 330813a1f2bSShri Abhyankar sctx.rs = rs; 331813a1f2bSShri Abhyankar sctx.pv = rtmp[i]; 3329566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 33307b50cabSHong Zhang if (sctx.newshift) break; /* break for-loop */ 33407b50cabSHong Zhang rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 335813a1f2bSShri Abhyankar 336a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 337b65878eeSJunchao Zhang pv = ba + bdiag[i]; 338813a1f2bSShri Abhyankar *pv = 1.0 / rtmp[i]; 339813a1f2bSShri Abhyankar 340813a1f2bSShri Abhyankar } /* endof for (i=0; i<n; i++) { */ 341813a1f2bSShri Abhyankar 3428ff23777SHong Zhang /* MatPivotRefine() */ 34307b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 344813a1f2bSShri Abhyankar /* 345813a1f2bSShri Abhyankar * if no shift in this attempt & shifting & started shifting & can refine, 346813a1f2bSShri Abhyankar * then try lower shift 347813a1f2bSShri Abhyankar */ 348813a1f2bSShri Abhyankar sctx.shift_hi = sctx.shift_fraction; 349813a1f2bSShri Abhyankar sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 350813a1f2bSShri Abhyankar sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 35107b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 352813a1f2bSShri Abhyankar sctx.nshift++; 353813a1f2bSShri Abhyankar } 35407b50cabSHong Zhang } while (sctx.newshift); 355813a1f2bSShri Abhyankar 356b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 357b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 358b65878eeSJunchao Zhang 3599566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 3609566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 3619566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 362d3ac4fa3SBarry Smith 3639566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 3649566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 3654d12350bSJunchao Zhang if (b->inode.size_csr) { 366abb87a52SBarry Smith C->ops->solve = MatSolve_SeqAIJ_Inode; 367abb87a52SBarry Smith } else if (row_identity && col_identity) { 36835233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 369813a1f2bSShri Abhyankar } else { 37035233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 371813a1f2bSShri Abhyankar } 37235233d99SShri Abhyankar C->ops->solveadd = MatSolveAdd_SeqAIJ; 37335233d99SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 37435233d99SShri Abhyankar C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 37535233d99SShri Abhyankar C->ops->matsolve = MatMatSolve_SeqAIJ; 376a3d9026eSPierre Jolivet C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ; 377813a1f2bSShri Abhyankar C->assembled = PETSC_TRUE; 378813a1f2bSShri Abhyankar C->preallocated = PETSC_TRUE; 3792205254eSKarl Rupp 3809566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 381813a1f2bSShri Abhyankar 38214d2772eSHong Zhang /* MatShiftView(A,info,&sctx) */ 383813a1f2bSShri Abhyankar if (sctx.nshift) { 384f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 3859566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 386f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 3879566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 388f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 3899566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 390813a1f2bSShri Abhyankar } 391813a1f2bSShri Abhyankar } 3923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 393813a1f2bSShri Abhyankar } 394813a1f2bSShri Abhyankar 395ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 396ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 397ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 398ff6a9541SJacob Faibussowitsch 399d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 400d71ae5a4SJacob Faibussowitsch { 401719d5645SBarry Smith Mat C = B; 402416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 40382bf6240SBarry Smith IS isrow = b->row, isicol = b->icol; 4045d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 405d278edc7SBarry Smith PetscInt nz, row, i, j, n = A->rmap->n, diag; 406d278edc7SBarry Smith const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 407*421480d9SBarry Smith const PetscInt *ajtmp, *bjtmp, *ddiag, *pj; 408d278edc7SBarry Smith MatScalar *pv, *rtmp, *pc, multiplier, d; 409b65878eeSJunchao Zhang const MatScalar *v, *aa; 410b65878eeSJunchao Zhang MatScalar *ba; 41133f9893dSHong Zhang PetscReal rs = 0.0; 412d90ac83dSHong Zhang FactorShiftCtx sctx; 413ace3abfcSBarry Smith PetscBool row_identity, col_identity; 414289bc588SBarry Smith 4153a40ed3dSBarry Smith PetscFunctionBegin; 416*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 417*421480d9SBarry Smith 418b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 419b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 420504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 4219566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 422ada7143aSSatish Balay 423f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 424*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 4259f7d0b68SHong Zhang sctx.shift_top = info->zeropivot; 4266cc28720Svictorle for (i = 0; i < n; i++) { 4279f95998fSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 428*421480d9SBarry Smith d = aa[ddiag[i]]; 4291a890ab1SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 4309f7d0b68SHong Zhang v = aa + ai[i]; 4319f7d0b68SHong Zhang nz = ai[i + 1] - ai[i]; 4322205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 4331a890ab1SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 4346cc28720Svictorle } 435118f5789SBarry Smith sctx.shift_top *= 1.1; 436d2276718SHong Zhang sctx.nshift_max = 5; 437d2276718SHong Zhang sctx.shift_lo = 0.; 438d2276718SHong Zhang sctx.shift_hi = 1.; 439a0a356efSHong Zhang } 440a0a356efSHong Zhang 4419566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 4429566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 4439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 444504a3fadSHong Zhang ics = ic; 445504a3fadSHong Zhang 446085256b3SBarry Smith do { 44707b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 448289bc588SBarry Smith for (i = 0; i < n; i++) { 449b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 450b3bf805bSHong Zhang bjtmp = bj + bi[i]; 4519f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 452289bc588SBarry Smith 453289bc588SBarry Smith /* load in initial (unfactored row) */ 4549f7d0b68SHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 4559f7d0b68SHong Zhang ajtmp = aj + ai[r[i]]; 4569f7d0b68SHong Zhang v = aa + ai[r[i]]; 457ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 458d2276718SHong Zhang rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 459289bc588SBarry Smith 460b3bf805bSHong Zhang row = *bjtmp++; 461f2582111SSatish Balay while (row < i) { 4628c37ef55SBarry Smith pc = rtmp + row; 4638c37ef55SBarry Smith if (*pc != 0.0) { 464*421480d9SBarry Smith pv = ba + ddiag[row]; 465*421480d9SBarry Smith pj = b->j + ddiag[row] + 1; 46635aab85fSBarry Smith multiplier = *pc / *pv++; 4678c37ef55SBarry Smith *pc = multiplier; 468*421480d9SBarry Smith nz = bi[row + 1] - ddiag[row] - 1; 4699f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 4709566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 471289bc588SBarry Smith } 472b3bf805bSHong Zhang row = *bjtmp++; 473289bc588SBarry Smith } 474416022c9SBarry Smith /* finished row so stick it into b->a */ 475b65878eeSJunchao Zhang pv = ba + bi[i]; 476b3bf805bSHong Zhang pj = b->j + bi[i]; 477b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 478*421480d9SBarry Smith diag = ddiag[i] - bi[i]; 479e57ff13aSBarry Smith rs = 0.0; 480d3d32019SBarry Smith for (j = 0; j < nz; j++) { 4819f7d0b68SHong Zhang pv[j] = rtmp[pj[j]]; 4829f7d0b68SHong Zhang rs += PetscAbsScalar(pv[j]); 483d3d32019SBarry Smith } 484e57ff13aSBarry Smith rs -= PetscAbsScalar(pv[diag]); 485d2276718SHong Zhang 486d2276718SHong Zhang sctx.rs = rs; 487d2276718SHong Zhang sctx.pv = pv[diag]; 4889566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 48907b50cabSHong Zhang if (sctx.newshift) break; 490504a3fadSHong Zhang pv[diag] = sctx.pv; 49135aab85fSBarry Smith } 492d2276718SHong Zhang 49307b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 4946cc28720Svictorle /* 4956c037d1bSvictorle * if no shift in this attempt & shifting & started shifting & can refine, 4966cc28720Svictorle * then try lower shift 4976cc28720Svictorle */ 4980481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 4990481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 5000481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 50107b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 502d2276718SHong Zhang sctx.nshift++; 5036cc28720Svictorle } 50407b50cabSHong Zhang } while (sctx.newshift); 5050f11f9aeSSatish Balay 506a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 507*421480d9SBarry Smith for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]]; 5089566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 5099566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 5109566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 511b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 512b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 513d3ac4fa3SBarry Smith 5149566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 5159566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 5168d8c7c43SBarry Smith if (row_identity && col_identity) { 517ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 5188d8c7c43SBarry Smith } else { 519ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_inplace; 520db4efbfdSBarry Smith } 521ad04f41aSHong Zhang C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 522ad04f41aSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 523ad04f41aSHong Zhang C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 524ad04f41aSHong Zhang C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 525a3d9026eSPierre Jolivet C->ops->matsolvetranspose = NULL; 5262205254eSKarl Rupp 527c456f294SBarry Smith C->assembled = PETSC_TRUE; 5285c9eb25fSBarry Smith C->preallocated = PETSC_TRUE; 5292205254eSKarl Rupp 5309566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 531d2276718SHong Zhang if (sctx.nshift) { 532f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 5339566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 534f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 5359566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 5366cc28720Svictorle } 5370697b6caSHong Zhang } 53857508eceSPierre Jolivet C->ops->solve = MatSolve_SeqAIJ_inplace; 53957508eceSPierre Jolivet C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 5402205254eSKarl Rupp 5419566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode(C)); 5423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 543289bc588SBarry Smith } 5440889b5dcSvictorle 545ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 546ff6a9541SJacob Faibussowitsch 547137fb511SHong Zhang /* 548137fb511SHong Zhang This routine implements inplace ILU(0) with row or/and column permutations. 549137fb511SHong Zhang Input: 550137fb511SHong Zhang A - original matrix 551137fb511SHong Zhang Output; 552137fb511SHong Zhang A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 553137fb511SHong Zhang a->j (col index) is permuted by the inverse of colperm, then sorted 554137fb511SHong Zhang a->a reordered accordingly with a->j 555137fb511SHong Zhang a->diag (ptr to diagonal elements) is updated. 556137fb511SHong Zhang */ 557d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 558d71ae5a4SJacob Faibussowitsch { 559b051339dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 560b051339dSHong Zhang IS isrow = a->row, isicol = a->icol; 5615d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 5625d0c19d7SBarry Smith PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 5635d0c19d7SBarry Smith PetscInt *ajtmp, nz, row; 564*421480d9SBarry Smith PetscInt nbdiag, *pj; 565a77337e4SBarry Smith PetscScalar *rtmp, *pc, multiplier, d; 566504a3fadSHong Zhang MatScalar *pv, *v; 567137fb511SHong Zhang PetscReal rs; 568d90ac83dSHong Zhang FactorShiftCtx sctx; 569b65878eeSJunchao Zhang MatScalar *aa, *vtmp; 570*421480d9SBarry Smith PetscInt *diag; 571137fb511SHong Zhang 572137fb511SHong Zhang PetscFunctionBegin; 57308401ef6SPierre Jolivet PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 574504a3fadSHong Zhang 575*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL)); 576b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArray(A, &aa)); 577504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 5789566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 579504a3fadSHong Zhang 580504a3fadSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 581*421480d9SBarry Smith const PetscInt *ddiag; 582*421480d9SBarry Smith 583*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 584504a3fadSHong Zhang sctx.shift_top = info->zeropivot; 585504a3fadSHong Zhang for (i = 0; i < n; i++) { 586504a3fadSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 587*421480d9SBarry Smith d = aa[ddiag[i]]; 588504a3fadSHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 589504a3fadSHong Zhang vtmp = aa + ai[i]; 590504a3fadSHong Zhang nz = ai[i + 1] - ai[i]; 5912205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 592504a3fadSHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 593504a3fadSHong Zhang } 594504a3fadSHong Zhang sctx.shift_top *= 1.1; 595504a3fadSHong Zhang sctx.nshift_max = 5; 596504a3fadSHong Zhang sctx.shift_lo = 0.; 597504a3fadSHong Zhang sctx.shift_hi = 1.; 598504a3fadSHong Zhang } 599504a3fadSHong Zhang 6009566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 6019566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 6029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 6039566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, n + 1)); 604b051339dSHong Zhang ics = ic; 605137fb511SHong Zhang 606504a3fadSHong Zhang #if defined(MV) 60775567043SBarry Smith sctx.shift_top = 0.; 608e60cf9a0SBarry Smith sctx.nshift_max = 0; 60975567043SBarry Smith sctx.shift_lo = 0.; 61075567043SBarry Smith sctx.shift_hi = 0.; 61175567043SBarry Smith sctx.shift_fraction = 0.; 612137fb511SHong Zhang 613f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 61475567043SBarry Smith sctx.shift_top = 0.; 615137fb511SHong Zhang for (i = 0; i < n; i++) { 616137fb511SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 617b65878eeSJunchao Zhang d = aa[diag[i]]; 618137fb511SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 619b65878eeSJunchao Zhang v = aa + ai[i]; 620b051339dSHong Zhang nz = ai[i + 1] - ai[i]; 6212205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 622137fb511SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 623137fb511SHong Zhang } 624137fb511SHong Zhang if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 625137fb511SHong Zhang sctx.shift_top *= 1.1; 626137fb511SHong Zhang sctx.nshift_max = 5; 627137fb511SHong Zhang sctx.shift_lo = 0.; 628137fb511SHong Zhang sctx.shift_hi = 1.; 629137fb511SHong Zhang } 630137fb511SHong Zhang 63175567043SBarry Smith sctx.shift_amount = 0.; 632137fb511SHong Zhang sctx.nshift = 0; 633504a3fadSHong Zhang #endif 634504a3fadSHong Zhang 635137fb511SHong Zhang do { 63607b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 637137fb511SHong Zhang for (i = 0; i < n; i++) { 638b051339dSHong Zhang /* load in initial unfactored row */ 639b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 640b051339dSHong Zhang ajtmp = aj + ai[r[i]]; 641b65878eeSJunchao Zhang v = aa + ai[r[i]]; 642137fb511SHong Zhang /* sort permuted ajtmp and values v accordingly */ 643b051339dSHong Zhang for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 6449566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 645137fb511SHong Zhang 646b051339dSHong Zhang diag[r[i]] = ai[r[i]]; 647137fb511SHong Zhang for (j = 0; j < nz; j++) { 648137fb511SHong Zhang rtmp[ajtmp[j]] = v[j]; 649b051339dSHong Zhang if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 650137fb511SHong Zhang } 651137fb511SHong Zhang rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 652137fb511SHong Zhang 653b051339dSHong Zhang row = *ajtmp++; 654137fb511SHong Zhang while (row < i) { 655137fb511SHong Zhang pc = rtmp + row; 656137fb511SHong Zhang if (*pc != 0.0) { 657b65878eeSJunchao Zhang pv = aa + diag[r[row]]; 658b051339dSHong Zhang pj = aj + diag[r[row]] + 1; 659137fb511SHong Zhang 660137fb511SHong Zhang multiplier = *pc / *pv++; 661137fb511SHong Zhang *pc = multiplier; 662b051339dSHong Zhang nz = ai[r[row] + 1] - diag[r[row]] - 1; 663b051339dSHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 6649566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 665137fb511SHong Zhang } 666b051339dSHong Zhang row = *ajtmp++; 667137fb511SHong Zhang } 668b65878eeSJunchao Zhang /* finished row so overwrite it onto aa */ 669b65878eeSJunchao Zhang pv = aa + ai[r[i]]; 670b051339dSHong Zhang pj = aj + ai[r[i]]; 671b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 672b051339dSHong Zhang nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 673137fb511SHong Zhang 674137fb511SHong Zhang rs = 0.0; 675137fb511SHong Zhang for (j = 0; j < nz; j++) { 676b051339dSHong Zhang pv[j] = rtmp[pj[j]]; 677b051339dSHong Zhang if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 678137fb511SHong Zhang } 679137fb511SHong Zhang 680137fb511SHong Zhang sctx.rs = rs; 681b051339dSHong Zhang sctx.pv = pv[nbdiag]; 6829566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 68307b50cabSHong Zhang if (sctx.newshift) break; 684504a3fadSHong Zhang pv[nbdiag] = sctx.pv; 685137fb511SHong Zhang } 686137fb511SHong Zhang 68707b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 688137fb511SHong Zhang /* 689137fb511SHong Zhang * if no shift in this attempt & shifting & started shifting & can refine, 690137fb511SHong Zhang * then try lower shift 691137fb511SHong Zhang */ 6920481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 6930481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 6940481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 69507b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 696137fb511SHong Zhang sctx.nshift++; 697137fb511SHong Zhang } 69807b50cabSHong Zhang } while (sctx.newshift); 699137fb511SHong Zhang 700a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 701b65878eeSJunchao Zhang for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]]; 702137fb511SHong Zhang 703b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArray(A, &aa)); 7049566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 7059566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 7069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 7072205254eSKarl Rupp 708b051339dSHong Zhang A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 709ad04f41aSHong Zhang A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 710ad04f41aSHong Zhang A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 711ad04f41aSHong Zhang A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 7122205254eSKarl Rupp 713b051339dSHong Zhang A->assembled = PETSC_TRUE; 7145c9eb25fSBarry Smith A->preallocated = PETSC_TRUE; 7152205254eSKarl Rupp 7169566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(A->cmap->n)); 717137fb511SHong Zhang if (sctx.nshift) { 718f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 7199566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 720f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 7219566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 722137fb511SHong Zhang } 723137fb511SHong Zhang } 7243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 725137fb511SHong Zhang } 726137fb511SHong Zhang 727d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 728d71ae5a4SJacob Faibussowitsch { 729416022c9SBarry Smith Mat C; 730416022c9SBarry Smith 7313a40ed3dSBarry Smith PetscFunctionBegin; 7329566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 7339566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 7349566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(C, A, info)); 7352205254eSKarl Rupp 736db4efbfdSBarry Smith A->ops->solve = C->ops->solve; 737db4efbfdSBarry Smith A->ops->solvetranspose = C->ops->solvetranspose; 7382205254eSKarl Rupp 7399566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 7403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 741da3a660dSBarry Smith } 7421d098868SHong Zhang 743d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 744d71ae5a4SJacob Faibussowitsch { 745416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 746416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 7475d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 7485d0c19d7SBarry Smith PetscInt nz; 7495d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 750dd6ea824SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 751d9fead3dSBarry Smith const PetscScalar *b; 752b65878eeSJunchao Zhang const MatScalar *aa, *v; 753*421480d9SBarry Smith const PetscInt *adiag; 7548c37ef55SBarry Smith 7553a40ed3dSBarry Smith PetscFunctionBegin; 7563ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 75791bf9adeSBarry Smith 758*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 759b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 7609566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 7619566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 762416022c9SBarry Smith tmp = a->solve_work; 7638c37ef55SBarry Smith 7649371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 7659371c9d4SSatish Balay r = rout; 7669371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 7679371c9d4SSatish Balay c = cout + (n - 1); 7688c37ef55SBarry Smith 7698c37ef55SBarry Smith /* forward solve the lower triangular */ 7708c37ef55SBarry Smith tmp[0] = b[*r++]; 771010a20caSHong Zhang tmps = tmp; 7728c37ef55SBarry Smith for (i = 1; i < n; i++) { 773010a20caSHong Zhang v = aa + ai[i]; 774010a20caSHong Zhang vi = aj + ai[i]; 775*421480d9SBarry Smith nz = adiag[i] - ai[i]; 77653e7425aSBarry Smith sum = b[*r++]; 777003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 7788c37ef55SBarry Smith tmp[i] = sum; 7798c37ef55SBarry Smith } 7808c37ef55SBarry Smith 7818c37ef55SBarry Smith /* backward solve the upper triangular */ 7828c37ef55SBarry Smith for (i = n - 1; i >= 0; i--) { 783*421480d9SBarry Smith v = aa + adiag[i] + 1; 784*421480d9SBarry Smith vi = aj + adiag[i] + 1; 785*421480d9SBarry Smith nz = ai[i + 1] - adiag[i] - 1; 7868c37ef55SBarry Smith sum = tmp[i]; 787003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 788*421480d9SBarry Smith x[*c--] = tmp[i] = sum * aa[adiag[i]]; 7898c37ef55SBarry Smith } 790b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 7919566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 7929566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 7939566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 7949566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 7959566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 7963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7978c37ef55SBarry Smith } 798026e39d0SSatish Balay 799ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 800d71ae5a4SJacob Faibussowitsch { 8012bebee5dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8022bebee5dSHong Zhang IS iscol = a->col, isrow = a->row; 8035d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 804910cf402Sprj- PetscInt nz, neq, ldb, ldx; 8055d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 806910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 807b65878eeSJunchao Zhang const PetscScalar *b, *aa, *v; 808910cf402Sprj- PetscBool isdense; 809*421480d9SBarry Smith const PetscInt *adiag; 8102bebee5dSHong Zhang 8112bebee5dSHong Zhang PetscFunctionBegin; 8123ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 8139566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 81428b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 815f9fb9879SHong Zhang if (X != B) { 8169566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 81728b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 818f9fb9879SHong Zhang } 819*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 820b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 8219566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 8229566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 8239566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 8249566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 8259371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 8269371c9d4SSatish Balay r = rout; 8279371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 8289371c9d4SSatish Balay c = cout; 829a36b22ccSSatish Balay for (neq = 0; neq < B->cmap->n; neq++) { 8302bebee5dSHong Zhang /* forward solve the lower triangular */ 8312bebee5dSHong Zhang tmp[0] = b[r[0]]; 8322bebee5dSHong Zhang tmps = tmp; 8332bebee5dSHong Zhang for (i = 1; i < n; i++) { 8342bebee5dSHong Zhang v = aa + ai[i]; 8352bebee5dSHong Zhang vi = aj + ai[i]; 836*421480d9SBarry Smith nz = adiag[i] - ai[i]; 8372bebee5dSHong Zhang sum = b[r[i]]; 838003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 8392bebee5dSHong Zhang tmp[i] = sum; 8402bebee5dSHong Zhang } 8412bebee5dSHong Zhang /* backward solve the upper triangular */ 8422bebee5dSHong Zhang for (i = n - 1; i >= 0; i--) { 843*421480d9SBarry Smith v = aa + adiag[i] + 1; 844*421480d9SBarry Smith vi = aj + adiag[i] + 1; 845*421480d9SBarry Smith nz = ai[i + 1] - adiag[i] - 1; 8462bebee5dSHong Zhang sum = tmp[i]; 847003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 848*421480d9SBarry Smith x[c[i]] = tmp[i] = sum * aa[adiag[i]]; 8492bebee5dSHong Zhang } 850910cf402Sprj- b += ldb; 851910cf402Sprj- x += ldx; 8522bebee5dSHong Zhang } 853b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 8549566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 8559566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 8569566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 8579566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 8589566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 8593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8602bebee5dSHong Zhang } 8612bebee5dSHong Zhang 862d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 863d71ae5a4SJacob Faibussowitsch { 8649bd0847aSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8659bd0847aSShri Abhyankar IS iscol = a->col, isrow = a->row; 866*421480d9SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 867*421480d9SBarry Smith const PetscInt *adiag; 868910cf402Sprj- PetscInt nz, neq, ldb, ldx; 8699bd0847aSShri Abhyankar const PetscInt *rout, *cout, *r, *c; 870910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, sum; 871b65878eeSJunchao Zhang const PetscScalar *b, *aa, *v; 872910cf402Sprj- PetscBool isdense; 8739bd0847aSShri Abhyankar 8749bd0847aSShri Abhyankar PetscFunctionBegin; 8753ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 8769566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 87728b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 878f9fb9879SHong Zhang if (X != B) { 8799566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 88028b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 881f9fb9879SHong Zhang } 882*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 883b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 8849566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 8859566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 8869566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 8879566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 8889371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 8899371c9d4SSatish Balay r = rout; 8909371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 8919371c9d4SSatish Balay c = cout; 8929bd0847aSShri Abhyankar for (neq = 0; neq < B->cmap->n; neq++) { 8939bd0847aSShri Abhyankar /* forward solve the lower triangular */ 8949bd0847aSShri Abhyankar tmp[0] = b[r[0]]; 8959bd0847aSShri Abhyankar v = aa; 8969bd0847aSShri Abhyankar vi = aj; 8979bd0847aSShri Abhyankar for (i = 1; i < n; i++) { 8989bd0847aSShri Abhyankar nz = ai[i + 1] - ai[i]; 8999bd0847aSShri Abhyankar sum = b[r[i]]; 9009bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 9019bd0847aSShri Abhyankar tmp[i] = sum; 9029371c9d4SSatish Balay v += nz; 9039371c9d4SSatish Balay vi += nz; 9049bd0847aSShri Abhyankar } 9059bd0847aSShri Abhyankar /* backward solve the upper triangular */ 9069bd0847aSShri Abhyankar for (i = n - 1; i >= 0; i--) { 9079bd0847aSShri Abhyankar v = aa + adiag[i + 1] + 1; 9089bd0847aSShri Abhyankar vi = aj + adiag[i + 1] + 1; 9099bd0847aSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 9109bd0847aSShri Abhyankar sum = tmp[i]; 9119bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 9129bd0847aSShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 9139bd0847aSShri Abhyankar } 914910cf402Sprj- b += ldb; 915910cf402Sprj- x += ldx; 9169bd0847aSShri Abhyankar } 917b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 9189566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 9199566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 9209566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 9219566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 9229566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 9233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9249bd0847aSShri Abhyankar } 9259bd0847aSShri Abhyankar 926a3d9026eSPierre Jolivet PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X) 927a3d9026eSPierre Jolivet { 928a3d9026eSPierre Jolivet Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 929a3d9026eSPierre Jolivet IS iscol = a->col, isrow = a->row; 930*421480d9SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j; 931*421480d9SBarry Smith const PetscInt *adiag = a->diag; 932a3d9026eSPierre Jolivet PetscInt nz, neq, ldb, ldx; 933a3d9026eSPierre Jolivet const PetscInt *rout, *cout, *r, *c; 934a3d9026eSPierre Jolivet PetscScalar *x, *tmp = a->solve_work, s1; 935b65878eeSJunchao Zhang const PetscScalar *b, *aa, *v; 936a3d9026eSPierre Jolivet PetscBool isdense; 937a3d9026eSPierre Jolivet 938a3d9026eSPierre Jolivet PetscFunctionBegin; 939a3d9026eSPierre Jolivet if (!n) PetscFunctionReturn(PETSC_SUCCESS); 940a3d9026eSPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 941a3d9026eSPierre Jolivet PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 942a3d9026eSPierre Jolivet if (X != B) { 943a3d9026eSPierre Jolivet PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 944a3d9026eSPierre Jolivet PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 945a3d9026eSPierre Jolivet } 946*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 947b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 948a3d9026eSPierre Jolivet PetscCall(MatDenseGetArrayRead(B, &b)); 949a3d9026eSPierre Jolivet PetscCall(MatDenseGetLDA(B, &ldb)); 950a3d9026eSPierre Jolivet PetscCall(MatDenseGetArray(X, &x)); 951a3d9026eSPierre Jolivet PetscCall(MatDenseGetLDA(X, &ldx)); 952a3d9026eSPierre Jolivet PetscCall(ISGetIndices(isrow, &rout)); 953a3d9026eSPierre Jolivet r = rout; 954a3d9026eSPierre Jolivet PetscCall(ISGetIndices(iscol, &cout)); 955a3d9026eSPierre Jolivet c = cout; 956a3d9026eSPierre Jolivet for (neq = 0; neq < B->cmap->n; neq++) { 957a3d9026eSPierre Jolivet /* copy the b into temp work space according to permutation */ 958a3d9026eSPierre Jolivet for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 959a3d9026eSPierre Jolivet 960a3d9026eSPierre Jolivet /* forward solve the U^T */ 961a3d9026eSPierre Jolivet for (i = 0; i < n; i++) { 962a3d9026eSPierre Jolivet v = aa + adiag[i + 1] + 1; 963a3d9026eSPierre Jolivet vi = aj + adiag[i + 1] + 1; 964a3d9026eSPierre Jolivet nz = adiag[i] - adiag[i + 1] - 1; 965a3d9026eSPierre Jolivet s1 = tmp[i]; 966a3d9026eSPierre Jolivet s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 967a3d9026eSPierre Jolivet for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 968a3d9026eSPierre Jolivet tmp[i] = s1; 969a3d9026eSPierre Jolivet } 970a3d9026eSPierre Jolivet 971a3d9026eSPierre Jolivet /* backward solve the L^T */ 972a3d9026eSPierre Jolivet for (i = n - 1; i >= 0; i--) { 973a3d9026eSPierre Jolivet v = aa + ai[i]; 974a3d9026eSPierre Jolivet vi = aj + ai[i]; 975a3d9026eSPierre Jolivet nz = ai[i + 1] - ai[i]; 976a3d9026eSPierre Jolivet s1 = tmp[i]; 977a3d9026eSPierre Jolivet for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 978a3d9026eSPierre Jolivet } 979a3d9026eSPierre Jolivet 980a3d9026eSPierre Jolivet /* copy tmp into x according to permutation */ 981a3d9026eSPierre Jolivet for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 982a3d9026eSPierre Jolivet b += ldb; 983a3d9026eSPierre Jolivet x += ldx; 984a3d9026eSPierre Jolivet } 985b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 986a3d9026eSPierre Jolivet PetscCall(ISRestoreIndices(isrow, &rout)); 987a3d9026eSPierre Jolivet PetscCall(ISRestoreIndices(iscol, &cout)); 988a3d9026eSPierre Jolivet PetscCall(MatDenseRestoreArrayRead(B, &b)); 989a3d9026eSPierre Jolivet PetscCall(MatDenseRestoreArray(X, &x)); 990a3d9026eSPierre Jolivet PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 991a3d9026eSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 992a3d9026eSPierre Jolivet } 993a3d9026eSPierre Jolivet 994ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 995d71ae5a4SJacob Faibussowitsch { 996137fb511SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 997137fb511SHong Zhang IS iscol = a->col, isrow = a->row; 998*421480d9SBarry Smith const PetscInt *r, *c, *rout, *cout, *adiag; 9995d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 10005d0c19d7SBarry Smith PetscInt nz, row; 1001fdc842d1SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 1002fdc842d1SBarry Smith const PetscScalar *b; 1003b65878eeSJunchao Zhang const MatScalar *aa, *v; 1004137fb511SHong Zhang 1005137fb511SHong Zhang PetscFunctionBegin; 10063ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1007137fb511SHong Zhang 1008*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1009b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 10109566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 10119566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1012137fb511SHong Zhang tmp = a->solve_work; 1013137fb511SHong Zhang 10149371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 10159371c9d4SSatish Balay r = rout; 10169371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 10179371c9d4SSatish Balay c = cout + (n - 1); 1018137fb511SHong Zhang 1019137fb511SHong Zhang /* forward solve the lower triangular */ 1020137fb511SHong Zhang tmp[0] = b[*r++]; 1021137fb511SHong Zhang tmps = tmp; 1022137fb511SHong Zhang for (row = 1; row < n; row++) { 1023137fb511SHong Zhang i = rout[row]; /* permuted row */ 1024137fb511SHong Zhang v = aa + ai[i]; 1025137fb511SHong Zhang vi = aj + ai[i]; 1026*421480d9SBarry Smith nz = adiag[i] - ai[i]; 1027137fb511SHong Zhang sum = b[*r++]; 1028003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1029137fb511SHong Zhang tmp[row] = sum; 1030137fb511SHong Zhang } 1031137fb511SHong Zhang 1032137fb511SHong Zhang /* backward solve the upper triangular */ 1033137fb511SHong Zhang for (row = n - 1; row >= 0; row--) { 1034137fb511SHong Zhang i = rout[row]; /* permuted row */ 1035*421480d9SBarry Smith v = aa + adiag[i] + 1; 1036*421480d9SBarry Smith vi = aj + adiag[i] + 1; 1037*421480d9SBarry Smith nz = ai[i + 1] - adiag[i] - 1; 1038137fb511SHong Zhang sum = tmp[row]; 1039003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1040*421480d9SBarry Smith x[*c--] = tmp[row] = sum * aa[adiag[i]]; 1041137fb511SHong Zhang } 1042b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 10439566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 10449566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 10459566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 10469566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 10479566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 10483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1049137fb511SHong Zhang } 1050137fb511SHong Zhang 1051c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1052ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1053d71ae5a4SJacob Faibussowitsch { 1054930ae53cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1055d0f46423SBarry Smith PetscInt n = A->rmap->n; 1056*421480d9SBarry Smith const PetscInt *ai = a->i, *aj = a->j, *adiag; 1057356650c2SBarry Smith PetscScalar *x; 1058356650c2SBarry Smith const PetscScalar *b; 1059b65878eeSJunchao Zhang const MatScalar *aa; 1060aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1061356650c2SBarry Smith PetscInt adiag_i, i, nz, ai_i; 1062b377110cSBarry Smith const PetscInt *vi; 1063dd6ea824SBarry Smith const MatScalar *v; 1064dd6ea824SBarry Smith PetscScalar sum; 1065d85afc42SSatish Balay #endif 1066930ae53cSBarry Smith 10673a40ed3dSBarry Smith PetscFunctionBegin; 10683ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1069*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1070b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 10719566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 10729566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1073930ae53cSBarry Smith 1074aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 10751c4feecaSSatish Balay fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 10761c4feecaSSatish Balay #else 1077930ae53cSBarry Smith /* forward solve the lower triangular */ 1078930ae53cSBarry Smith x[0] = b[0]; 1079930ae53cSBarry Smith for (i = 1; i < n; i++) { 1080930ae53cSBarry Smith ai_i = ai[i]; 1081930ae53cSBarry Smith v = aa + ai_i; 1082930ae53cSBarry Smith vi = aj + ai_i; 1083930ae53cSBarry Smith nz = adiag[i] - ai_i; 1084930ae53cSBarry Smith sum = b[i]; 1085003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1086930ae53cSBarry Smith x[i] = sum; 1087930ae53cSBarry Smith } 1088930ae53cSBarry Smith 1089930ae53cSBarry Smith /* backward solve the upper triangular */ 1090930ae53cSBarry Smith for (i = n - 1; i >= 0; i--) { 1091930ae53cSBarry Smith adiag_i = adiag[i]; 1092d708749eSBarry Smith v = aa + adiag_i + 1; 1093d708749eSBarry Smith vi = aj + adiag_i + 1; 1094930ae53cSBarry Smith nz = ai[i + 1] - adiag_i - 1; 1095930ae53cSBarry Smith sum = x[i]; 1096003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1097930ae53cSBarry Smith x[i] = sum * aa[adiag_i]; 1098930ae53cSBarry Smith } 10991c4feecaSSatish Balay #endif 11009566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1101b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 11029566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 11039566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 11043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1105930ae53cSBarry Smith } 1106930ae53cSBarry Smith 1107ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1108d71ae5a4SJacob Faibussowitsch { 1109416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1110416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 111104fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 11125d0c19d7SBarry Smith PetscInt nz; 1113*421480d9SBarry Smith const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag; 111404fbf559SBarry Smith PetscScalar *x, *tmp, sum; 111504fbf559SBarry Smith const PetscScalar *b; 1116b65878eeSJunchao Zhang const MatScalar *aa, *v; 1117da3a660dSBarry Smith 11183a40ed3dSBarry Smith PetscFunctionBegin; 11199566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 1120da3a660dSBarry Smith 1121*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1122b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 11239566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 11249566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1125416022c9SBarry Smith tmp = a->solve_work; 1126da3a660dSBarry Smith 11279371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 11289371c9d4SSatish Balay r = rout; 11299371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 11309371c9d4SSatish Balay c = cout + (n - 1); 1131da3a660dSBarry Smith 1132da3a660dSBarry Smith /* forward solve the lower triangular */ 1133da3a660dSBarry Smith tmp[0] = b[*r++]; 1134da3a660dSBarry Smith for (i = 1; i < n; i++) { 1135010a20caSHong Zhang v = aa + ai[i]; 1136010a20caSHong Zhang vi = aj + ai[i]; 1137*421480d9SBarry Smith nz = adiag[i] - ai[i]; 1138da3a660dSBarry Smith sum = b[*r++]; 113904fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1140da3a660dSBarry Smith tmp[i] = sum; 1141da3a660dSBarry Smith } 1142da3a660dSBarry Smith 1143da3a660dSBarry Smith /* backward solve the upper triangular */ 1144da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1145*421480d9SBarry Smith v = aa + adiag[i] + 1; 1146*421480d9SBarry Smith vi = aj + adiag[i] + 1; 1147*421480d9SBarry Smith nz = ai[i + 1] - adiag[i] - 1; 1148da3a660dSBarry Smith sum = tmp[i]; 114904fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1150*421480d9SBarry Smith tmp[i] = sum * aa[adiag[i]]; 1151da3a660dSBarry Smith x[*c--] += tmp[i]; 1152da3a660dSBarry Smith } 1153da3a660dSBarry Smith 1154b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 11559566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 11569566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 11579566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 11589566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 11599566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 11603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1161da3a660dSBarry Smith } 116204fbf559SBarry Smith 1163d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1164d71ae5a4SJacob Faibussowitsch { 11653c0119dfSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 11663c0119dfSShri Abhyankar IS iscol = a->col, isrow = a->row; 11673c0119dfSShri Abhyankar PetscInt i, n = A->rmap->n, j; 11683c0119dfSShri Abhyankar PetscInt nz; 1169*421480d9SBarry Smith const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag; 11703c0119dfSShri Abhyankar PetscScalar *x, *tmp, sum; 11713c0119dfSShri Abhyankar const PetscScalar *b; 1172b65878eeSJunchao Zhang const MatScalar *aa, *v; 11733c0119dfSShri Abhyankar 11743c0119dfSShri Abhyankar PetscFunctionBegin; 11759566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 11763c0119dfSShri Abhyankar 1177*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1178b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 11799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 11809566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 11813c0119dfSShri Abhyankar tmp = a->solve_work; 11823c0119dfSShri Abhyankar 11839371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 11849371c9d4SSatish Balay r = rout; 11859371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 11869371c9d4SSatish Balay c = cout; 11873c0119dfSShri Abhyankar 11883c0119dfSShri Abhyankar /* forward solve the lower triangular */ 11893c0119dfSShri Abhyankar tmp[0] = b[r[0]]; 11903c0119dfSShri Abhyankar v = aa; 11913c0119dfSShri Abhyankar vi = aj; 11923c0119dfSShri Abhyankar for (i = 1; i < n; i++) { 11933c0119dfSShri Abhyankar nz = ai[i + 1] - ai[i]; 11943c0119dfSShri Abhyankar sum = b[r[i]]; 11953c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 11963c0119dfSShri Abhyankar tmp[i] = sum; 11972205254eSKarl Rupp v += nz; 11982205254eSKarl Rupp vi += nz; 11993c0119dfSShri Abhyankar } 12003c0119dfSShri Abhyankar 12013c0119dfSShri Abhyankar /* backward solve the upper triangular */ 12023c0119dfSShri Abhyankar v = aa + adiag[n - 1]; 12033c0119dfSShri Abhyankar vi = aj + adiag[n - 1]; 12043c0119dfSShri Abhyankar for (i = n - 1; i >= 0; i--) { 12053c0119dfSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 12063c0119dfSShri Abhyankar sum = tmp[i]; 12073c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 12083c0119dfSShri Abhyankar tmp[i] = sum * v[nz]; 12093c0119dfSShri Abhyankar x[c[i]] += tmp[i]; 12109371c9d4SSatish Balay v += nz + 1; 12119371c9d4SSatish Balay vi += nz + 1; 12123c0119dfSShri Abhyankar } 12133c0119dfSShri Abhyankar 1214b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 12159566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 12169566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 12179566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12189566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12199566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 12203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 12213c0119dfSShri Abhyankar } 12223c0119dfSShri Abhyankar 1223d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1224d71ae5a4SJacob Faibussowitsch { 1225416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 12262235e667SBarry Smith IS iscol = a->col, isrow = a->row; 122704fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 122804fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 122904fbf559SBarry Smith PetscInt nz; 123004fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1231b65878eeSJunchao Zhang const MatScalar *aa, *v; 123204fbf559SBarry Smith const PetscScalar *b; 1233da3a660dSBarry Smith 12343a40ed3dSBarry Smith PetscFunctionBegin; 1235b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 12369566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12379566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1238416022c9SBarry Smith tmp = a->solve_work; 1239da3a660dSBarry Smith 12409371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 12419371c9d4SSatish Balay r = rout; 12429371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 12439371c9d4SSatish Balay c = cout; 1244da3a660dSBarry Smith 1245da3a660dSBarry Smith /* copy the b into temp work space according to permutation */ 12462235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1247da3a660dSBarry Smith 1248da3a660dSBarry Smith /* forward solve the U^T */ 1249da3a660dSBarry Smith for (i = 0; i < n; i++) { 1250010a20caSHong Zhang v = aa + diag[i]; 1251010a20caSHong Zhang vi = aj + diag[i] + 1; 1252f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 1253c38d4ed2SBarry Smith s1 = tmp[i]; 1254ef66eb69SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 125504fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1256c38d4ed2SBarry Smith tmp[i] = s1; 1257da3a660dSBarry Smith } 1258da3a660dSBarry Smith 1259da3a660dSBarry Smith /* backward solve the L^T */ 1260da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1261010a20caSHong Zhang v = aa + diag[i] - 1; 1262010a20caSHong Zhang vi = aj + diag[i] - 1; 1263f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 1264f1af5d2fSBarry Smith s1 = tmp[i]; 126504fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1266da3a660dSBarry Smith } 1267da3a660dSBarry Smith 1268da3a660dSBarry Smith /* copy tmp into x according to permutation */ 1269591294e4SBarry Smith for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1270da3a660dSBarry Smith 12719566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 12729566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1273b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 12749566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1276da3a660dSBarry Smith 12779566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 12783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1279da3a660dSBarry Smith } 1280da3a660dSBarry Smith 1281d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1282d71ae5a4SJacob Faibussowitsch { 1283d1fa9404SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1284d1fa9404SShri Abhyankar IS iscol = a->col, isrow = a->row; 1285*421480d9SBarry Smith const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi; 1286d1fa9404SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1287d1fa9404SShri Abhyankar PetscInt nz; 1288d1fa9404SShri Abhyankar PetscScalar *x, *tmp, s1; 1289b65878eeSJunchao Zhang const MatScalar *aa, *v; 1290d1fa9404SShri Abhyankar const PetscScalar *b; 1291d1fa9404SShri Abhyankar 1292d1fa9404SShri Abhyankar PetscFunctionBegin; 1293*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1294b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 12959566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12969566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1297d1fa9404SShri Abhyankar tmp = a->solve_work; 1298d1fa9404SShri Abhyankar 12999371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 13009371c9d4SSatish Balay r = rout; 13019371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 13029371c9d4SSatish Balay c = cout; 1303d1fa9404SShri Abhyankar 1304d1fa9404SShri Abhyankar /* copy the b into temp work space according to permutation */ 1305d1fa9404SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1306d1fa9404SShri Abhyankar 1307d1fa9404SShri Abhyankar /* forward solve the U^T */ 1308d1fa9404SShri Abhyankar for (i = 0; i < n; i++) { 1309d1fa9404SShri Abhyankar v = aa + adiag[i + 1] + 1; 1310d1fa9404SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1311d1fa9404SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1312d1fa9404SShri Abhyankar s1 = tmp[i]; 1313d1fa9404SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1314d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1315d1fa9404SShri Abhyankar tmp[i] = s1; 1316d1fa9404SShri Abhyankar } 1317d1fa9404SShri Abhyankar 1318d1fa9404SShri Abhyankar /* backward solve the L^T */ 1319d1fa9404SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1320d1fa9404SShri Abhyankar v = aa + ai[i]; 1321d1fa9404SShri Abhyankar vi = aj + ai[i]; 1322d1fa9404SShri Abhyankar nz = ai[i + 1] - ai[i]; 1323d1fa9404SShri Abhyankar s1 = tmp[i]; 1324d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1325d1fa9404SShri Abhyankar } 1326d1fa9404SShri Abhyankar 1327d1fa9404SShri Abhyankar /* copy tmp into x according to permutation */ 1328d1fa9404SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1329d1fa9404SShri Abhyankar 13309566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 13319566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1332b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 13339566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13349566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1335d1fa9404SShri Abhyankar 13369566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 13373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1338d1fa9404SShri Abhyankar } 1339d1fa9404SShri Abhyankar 1340d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1341d71ae5a4SJacob Faibussowitsch { 1342416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 13432235e667SBarry Smith IS iscol = a->col, isrow = a->row; 134404fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 134504fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 134604fbf559SBarry Smith PetscInt nz; 134704fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1348b65878eeSJunchao Zhang const MatScalar *aa, *v; 134904fbf559SBarry Smith const PetscScalar *b; 13506abc6512SBarry Smith 13513a40ed3dSBarry Smith PetscFunctionBegin; 13529566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 1353b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 13549566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 13559566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1356416022c9SBarry Smith tmp = a->solve_work; 13576abc6512SBarry Smith 13589371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 13599371c9d4SSatish Balay r = rout; 13609371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 13619371c9d4SSatish Balay c = cout; 13626abc6512SBarry Smith 13636abc6512SBarry Smith /* copy the b into temp work space according to permutation */ 13642235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 13656abc6512SBarry Smith 13666abc6512SBarry Smith /* forward solve the U^T */ 13676abc6512SBarry Smith for (i = 0; i < n; i++) { 1368010a20caSHong Zhang v = aa + diag[i]; 1369010a20caSHong Zhang vi = aj + diag[i] + 1; 1370f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 137104fbf559SBarry Smith s1 = tmp[i]; 137204fbf559SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 137304fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 137404fbf559SBarry Smith tmp[i] = s1; 13756abc6512SBarry Smith } 13766abc6512SBarry Smith 13776abc6512SBarry Smith /* backward solve the L^T */ 13786abc6512SBarry Smith for (i = n - 1; i >= 0; i--) { 1379010a20caSHong Zhang v = aa + diag[i] - 1; 1380010a20caSHong Zhang vi = aj + diag[i] - 1; 1381f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 138204fbf559SBarry Smith s1 = tmp[i]; 138304fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 13846abc6512SBarry Smith } 13856abc6512SBarry Smith 13866abc6512SBarry Smith /* copy tmp into x according to permutation */ 13876abc6512SBarry Smith for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 13886abc6512SBarry Smith 13899566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 13909566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1391b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 13929566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13939566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 13946abc6512SBarry Smith 13959566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 13963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1397da3a660dSBarry Smith } 139804fbf559SBarry Smith 1399d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1400d71ae5a4SJacob Faibussowitsch { 1401533e6011SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1402533e6011SShri Abhyankar IS iscol = a->col, isrow = a->row; 1403*421480d9SBarry Smith const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi; 1404533e6011SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1405533e6011SShri Abhyankar PetscInt nz; 1406533e6011SShri Abhyankar PetscScalar *x, *tmp, s1; 1407b65878eeSJunchao Zhang const MatScalar *aa, *v; 1408533e6011SShri Abhyankar const PetscScalar *b; 1409533e6011SShri Abhyankar 1410533e6011SShri Abhyankar PetscFunctionBegin; 14119566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 1412*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1413b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 14149566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 14159566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1416533e6011SShri Abhyankar tmp = a->solve_work; 1417533e6011SShri Abhyankar 14189371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 14199371c9d4SSatish Balay r = rout; 14209371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 14219371c9d4SSatish Balay c = cout; 1422533e6011SShri Abhyankar 1423533e6011SShri Abhyankar /* copy the b into temp work space according to permutation */ 1424533e6011SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1425533e6011SShri Abhyankar 1426533e6011SShri Abhyankar /* forward solve the U^T */ 1427533e6011SShri Abhyankar for (i = 0; i < n; i++) { 1428533e6011SShri Abhyankar v = aa + adiag[i + 1] + 1; 1429533e6011SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1430533e6011SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1431533e6011SShri Abhyankar s1 = tmp[i]; 1432533e6011SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1433533e6011SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1434533e6011SShri Abhyankar tmp[i] = s1; 1435533e6011SShri Abhyankar } 1436533e6011SShri Abhyankar 1437533e6011SShri Abhyankar /* backward solve the L^T */ 1438533e6011SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1439533e6011SShri Abhyankar v = aa + ai[i]; 1440533e6011SShri Abhyankar vi = aj + ai[i]; 1441533e6011SShri Abhyankar nz = ai[i + 1] - ai[i]; 1442533e6011SShri Abhyankar s1 = tmp[i]; 1443c5e3b2a3SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1444533e6011SShri Abhyankar } 1445533e6011SShri Abhyankar 1446533e6011SShri Abhyankar /* copy tmp into x according to permutation */ 1447533e6011SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1448533e6011SShri Abhyankar 14499566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 14509566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1451b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 14529566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 14539566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 1454533e6011SShri Abhyankar 14559566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 14563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1457533e6011SShri Abhyankar } 1458533e6011SShri Abhyankar 14598fc3a347SHong Zhang /* 14608a76ed4fSHong Zhang ilu() under revised new data structure. 1461813a1f2bSShri Abhyankar Factored arrays bj and ba are stored as 1462813a1f2bSShri Abhyankar L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1463813a1f2bSShri Abhyankar 1464813a1f2bSShri Abhyankar bi=fact->i is an array of size n+1, in which 1465baabb860SHong Zhang bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1466baabb860SHong Zhang bi[n]: points to L(n-1,n-1)+1 1467baabb860SHong Zhang 1468813a1f2bSShri Abhyankar bdiag=fact->diag is an array of size n+1,in which 1469baabb860SHong Zhang bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1470a24f213cSHong Zhang bdiag[n]: points to entry of U(n-1,0)-1 1471813a1f2bSShri Abhyankar 1472813a1f2bSShri Abhyankar U(i,:) contains bdiag[i] as its last entry, i.e., 1473813a1f2bSShri Abhyankar U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1474813a1f2bSShri Abhyankar */ 1475d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1476d71ae5a4SJacob Faibussowitsch { 1477813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1478*421480d9SBarry Smith const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag; 1479bbd65245SShri Abhyankar PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 14801df811f5SHong Zhang IS isicol; 1481813a1f2bSShri Abhyankar 1482813a1f2bSShri Abhyankar PetscFunctionBegin; 14839566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1484*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 14859566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 148657508eceSPierre Jolivet b = (Mat_SeqAIJ *)fact->data; 1487813a1f2bSShri Abhyankar 1488813a1f2bSShri Abhyankar /* allocate matrix arrays for new data structure */ 148984648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a)); 149084648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j)); 14919f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i)); 14929f0612e4SBarry Smith if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 14939f0612e4SBarry Smith b->free_a = PETSC_TRUE; 14949f0612e4SBarry Smith b->free_ij = PETSC_TRUE; 14952205254eSKarl Rupp 1496aa624791SPierre Jolivet if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1497813a1f2bSShri Abhyankar bdiag = b->diag; 1498813a1f2bSShri Abhyankar 1499813a1f2bSShri Abhyankar /* set bi and bj with new data structure */ 1500813a1f2bSShri Abhyankar bi = b->i; 1501813a1f2bSShri Abhyankar bj = b->j; 1502813a1f2bSShri Abhyankar 1503813a1f2bSShri Abhyankar /* L part */ 1504e60cf9a0SBarry Smith bi[0] = 0; 1505813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1506813a1f2bSShri Abhyankar nz = adiag[i] - ai[i]; 1507813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nz; 1508813a1f2bSShri Abhyankar aj = a->j + ai[i]; 1509ad540459SPierre Jolivet for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1510813a1f2bSShri Abhyankar } 1511813a1f2bSShri Abhyankar 1512813a1f2bSShri Abhyankar /* U part */ 151362fbd6afSShri Abhyankar bdiag[n] = bi[n] - 1; 1514813a1f2bSShri Abhyankar for (i = n - 1; i >= 0; i--) { 1515813a1f2bSShri Abhyankar nz = ai[i + 1] - adiag[i] - 1; 1516813a1f2bSShri Abhyankar aj = a->j + adiag[i] + 1; 1517ad540459SPierre Jolivet for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1518813a1f2bSShri Abhyankar /* diag[i] */ 1519bbd65245SShri Abhyankar bj[k++] = i; 1520a24f213cSHong Zhang bdiag[i] = bdiag[i + 1] + nz + 1; 1521813a1f2bSShri Abhyankar } 15221df811f5SHong Zhang 1523d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 15241df811f5SHong Zhang fact->info.factor_mallocs = 0; 15251df811f5SHong Zhang fact->info.fill_ratio_given = info->fill; 15261df811f5SHong Zhang fact->info.fill_ratio_needed = 1.0; 152735233d99SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 15289566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 15291df811f5SHong Zhang 153057508eceSPierre Jolivet b = (Mat_SeqAIJ *)fact->data; 15311df811f5SHong Zhang b->row = isrow; 15321df811f5SHong Zhang b->col = iscol; 15331df811f5SHong Zhang b->icol = isicol; 153484648c2dSPierre Jolivet PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work)); 15359566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 15369566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 15373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1538813a1f2bSShri Abhyankar } 1539813a1f2bSShri Abhyankar 1540d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1541d71ae5a4SJacob Faibussowitsch { 1542813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1543813a1f2bSShri Abhyankar IS isicol; 1544813a1f2bSShri Abhyankar const PetscInt *r, *ic; 15451df811f5SHong Zhang PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1546813a1f2bSShri Abhyankar PetscInt *bi, *cols, nnz, *cols_lvl; 1547813a1f2bSShri Abhyankar PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1548813a1f2bSShri Abhyankar PetscInt i, levels, diagonal_fill; 1549*421480d9SBarry Smith PetscBool col_identity, row_identity; 1550813a1f2bSShri Abhyankar PetscReal f; 15510298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1552813a1f2bSShri Abhyankar PetscBT lnkbt; 1553813a1f2bSShri Abhyankar PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 15540298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 15550298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1556*421480d9SBarry Smith PetscBool diagDense; 1557813a1f2bSShri Abhyankar 1558813a1f2bSShri Abhyankar PetscFunctionBegin; 155908401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1560*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 1561*421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 1562ad04f41aSHong Zhang 1563813a1f2bSShri Abhyankar levels = (PetscInt)info->levels; 15649566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 15659566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 1566813a1f2bSShri Abhyankar if (!levels && row_identity && col_identity) { 1567813a1f2bSShri Abhyankar /* special case: ilu(0) with natural ordering */ 15689566063dSJacob Faibussowitsch PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 15694d12350bSJunchao Zhang if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 15703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1571813a1f2bSShri Abhyankar } 1572813a1f2bSShri Abhyankar 15739566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 15749566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 15759566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 1576813a1f2bSShri Abhyankar 15771bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 15789f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 15799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1580e60cf9a0SBarry Smith bi[0] = bdiag[0] = 0; 15819566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1582813a1f2bSShri Abhyankar 1583813a1f2bSShri Abhyankar /* create a linked list for storing column indices of the active row */ 1584813a1f2bSShri Abhyankar nlnk = n + 1; 15859566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1586813a1f2bSShri Abhyankar 1587813a1f2bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 15881df811f5SHong Zhang f = info->fill; 15891df811f5SHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 15909566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1591813a1f2bSShri Abhyankar current_space = free_space; 15929566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1593813a1f2bSShri Abhyankar current_space_lvl = free_space_lvl; 1594813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1595813a1f2bSShri Abhyankar nzi = 0; 1596813a1f2bSShri Abhyankar /* copy current row into linked list */ 1597813a1f2bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 159828b400f6SJacob Faibussowitsch PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1599813a1f2bSShri Abhyankar cols = aj + ai[r[i]]; 1600813a1f2bSShri Abhyankar lnk[i] = -1; /* marker to indicate if diagonal exists */ 16019566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1602813a1f2bSShri Abhyankar nzi += nlnk; 1603813a1f2bSShri Abhyankar 1604813a1f2bSShri Abhyankar /* make sure diagonal entry is included */ 1605813a1f2bSShri Abhyankar if (diagonal_fill && lnk[i] == -1) { 1606813a1f2bSShri Abhyankar fm = n; 1607813a1f2bSShri Abhyankar while (lnk[fm] < i) fm = lnk[fm]; 1608813a1f2bSShri Abhyankar lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1609813a1f2bSShri Abhyankar lnk[fm] = i; 1610813a1f2bSShri Abhyankar lnk_lvl[i] = 0; 16119371c9d4SSatish Balay nzi++; 16129371c9d4SSatish Balay dcount++; 1613813a1f2bSShri Abhyankar } 1614813a1f2bSShri Abhyankar 1615813a1f2bSShri Abhyankar /* add pivot rows into the active row */ 1616813a1f2bSShri Abhyankar nzbd = 0; 1617813a1f2bSShri Abhyankar prow = lnk[n]; 1618813a1f2bSShri Abhyankar while (prow < i) { 1619813a1f2bSShri Abhyankar nnz = bdiag[prow]; 1620813a1f2bSShri Abhyankar cols = bj_ptr[prow] + nnz + 1; 1621813a1f2bSShri Abhyankar cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1622813a1f2bSShri Abhyankar nnz = bi[prow + 1] - bi[prow] - nnz - 1; 16239566063dSJacob Faibussowitsch PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1624813a1f2bSShri Abhyankar nzi += nlnk; 1625813a1f2bSShri Abhyankar prow = lnk[prow]; 1626813a1f2bSShri Abhyankar nzbd++; 1627813a1f2bSShri Abhyankar } 1628813a1f2bSShri Abhyankar bdiag[i] = nzbd; 1629813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 1630813a1f2bSShri Abhyankar /* if free space is not available, make more free space */ 1631813a1f2bSShri Abhyankar if (current_space->local_remaining < nzi) { 1632f91af8c7SBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 16339566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 16349566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1635813a1f2bSShri Abhyankar reallocs++; 1636813a1f2bSShri Abhyankar } 1637813a1f2bSShri Abhyankar 1638813a1f2bSShri Abhyankar /* copy data into free_space and free_space_lvl, then initialize lnk */ 16399566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1640813a1f2bSShri Abhyankar bj_ptr[i] = current_space->array; 1641813a1f2bSShri Abhyankar bjlvl_ptr[i] = current_space_lvl->array; 1642813a1f2bSShri Abhyankar 1643813a1f2bSShri Abhyankar /* make sure the active row i has diagonal entry */ 164400045ab3SPierre Jolivet PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1645813a1f2bSShri Abhyankar 1646813a1f2bSShri Abhyankar current_space->array += nzi; 1647813a1f2bSShri Abhyankar current_space->local_used += nzi; 1648813a1f2bSShri Abhyankar current_space->local_remaining -= nzi; 1649813a1f2bSShri Abhyankar current_space_lvl->array += nzi; 1650813a1f2bSShri Abhyankar current_space_lvl->local_used += nzi; 1651813a1f2bSShri Abhyankar current_space_lvl->local_remaining -= nzi; 1652813a1f2bSShri Abhyankar } 1653813a1f2bSShri Abhyankar 16549566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 16559566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 1656813a1f2bSShri Abhyankar /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 165784648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 16589566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1659813a1f2bSShri Abhyankar 16609566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 16619566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 16629566063dSJacob Faibussowitsch PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1663813a1f2bSShri Abhyankar 1664813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO) 1665813a1f2bSShri Abhyankar { 1666aef85c9fSShri Abhyankar PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 16679566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 16689566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 16699566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 16709566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 167148a46eb9SPierre Jolivet if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1672813a1f2bSShri Abhyankar } 1673813a1f2bSShri Abhyankar #endif 1674813a1f2bSShri Abhyankar /* put together the new matrix */ 16759566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 167657508eceSPierre Jolivet b = (Mat_SeqAIJ *)fact->data; 1677813a1f2bSShri Abhyankar b->free_ij = PETSC_TRUE; 16789f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 16799f0612e4SBarry Smith b->free_a = PETSC_TRUE; 1680813a1f2bSShri Abhyankar b->j = bj; 1681813a1f2bSShri Abhyankar b->i = bi; 1682813a1f2bSShri Abhyankar b->diag = bdiag; 1683f4259b30SLisandro Dalcin b->ilen = NULL; 1684f4259b30SLisandro Dalcin b->imax = NULL; 1685813a1f2bSShri Abhyankar b->row = isrow; 1686813a1f2bSShri Abhyankar b->col = iscol; 16879566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 16889566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 1689813a1f2bSShri Abhyankar b->icol = isicol; 16902205254eSKarl Rupp 169184648c2dSPierre Jolivet PetscCall(PetscMalloc1(n, &b->solve_work)); 1692813a1f2bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. 1693813a1f2bSShri Abhyankar Allocate bdiag, solve_work, new a, new j */ 1694f268cba8SShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 16952205254eSKarl Rupp 169657508eceSPierre Jolivet fact->info.factor_mallocs = reallocs; 169757508eceSPierre Jolivet fact->info.fill_ratio_given = f; 169857508eceSPierre Jolivet fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 169957508eceSPierre Jolivet fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 17004d12350bSJunchao Zhang if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 17019566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 17023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1703813a1f2bSShri Abhyankar } 1704813a1f2bSShri Abhyankar 1705d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 1706d71ae5a4SJacob Faibussowitsch { 17075f44c854SHong Zhang Mat C = B; 17085f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 17095f44c854SHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 17105f44c854SHong Zhang IS ip = b->row, iip = b->icol; 17115f44c854SHong Zhang const PetscInt *rip, *riip; 17125f44c854SHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 17135f44c854SHong Zhang PetscInt *ai = a->i, *aj = a->j; 17145f44c854SHong Zhang PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1715b65878eeSJunchao Zhang MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 1716ace3abfcSBarry Smith PetscBool perm_identity; 1717d90ac83dSHong Zhang FactorShiftCtx sctx; 171898a93161SHong Zhang PetscReal rs; 1719b65878eeSJunchao Zhang const MatScalar *aa, *v; 1720b65878eeSJunchao Zhang MatScalar d; 1721*421480d9SBarry Smith const PetscInt *adiag; 172298a93161SHong Zhang 17235f44c854SHong Zhang PetscFunctionBegin; 1724*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1725b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 172698a93161SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 17279566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 172898a93161SHong Zhang 1729f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 173098a93161SHong Zhang sctx.shift_top = info->zeropivot; 173198a93161SHong Zhang for (i = 0; i < mbs; i++) { 173298a93161SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1733*421480d9SBarry Smith d = aa[adiag[i]]; 173498a93161SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 173598a93161SHong Zhang v = aa + ai[i]; 173698a93161SHong Zhang nz = ai[i + 1] - ai[i]; 17372205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 173898a93161SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 173998a93161SHong Zhang } 174098a93161SHong Zhang sctx.shift_top *= 1.1; 174198a93161SHong Zhang sctx.nshift_max = 5; 174298a93161SHong Zhang sctx.shift_lo = 0.; 174398a93161SHong Zhang sctx.shift_hi = 1.; 174498a93161SHong Zhang } 17455f44c854SHong Zhang 17469566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 17479566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 17485f44c854SHong Zhang 17495f44c854SHong Zhang /* allocate working arrays 17505f44c854SHong Zhang c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 17515f44c854SHong Zhang il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 17525f44c854SHong Zhang */ 17539566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 17545f44c854SHong Zhang 175598a93161SHong Zhang do { 175607b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 175798a93161SHong Zhang 1758c5546cabSHong Zhang for (i = 0; i < mbs; i++) c2r[i] = mbs; 17592e987568SSatish Balay if (mbs) il[0] = 0; 17605f44c854SHong Zhang 17615f44c854SHong Zhang for (k = 0; k < mbs; k++) { 17625f44c854SHong Zhang /* zero rtmp */ 17635f44c854SHong Zhang nz = bi[k + 1] - bi[k]; 17645f44c854SHong Zhang bjtmp = bj + bi[k]; 17655f44c854SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 17665f44c854SHong Zhang 17675f44c854SHong Zhang /* load in initial unfactored row */ 17685f44c854SHong Zhang bval = ba + bi[k]; 17699371c9d4SSatish Balay jmin = ai[rip[k]]; 17709371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 17715f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 17725f44c854SHong Zhang col = riip[aj[j]]; 17735f44c854SHong Zhang if (col >= k) { /* only take upper triangular entry */ 17745f44c854SHong Zhang rtmp[col] = aa[j]; 17755f44c854SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 17765f44c854SHong Zhang } 17775f44c854SHong Zhang } 177898a93161SHong Zhang /* shift the diagonal of the matrix: ZeropivotApply() */ 177998a93161SHong Zhang rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 17805f44c854SHong Zhang 17815f44c854SHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 17825f44c854SHong Zhang dk = rtmp[k]; 17835f44c854SHong Zhang i = c2r[k]; /* first row to be added to k_th row */ 17845f44c854SHong Zhang 17855f44c854SHong Zhang while (i < k) { 17865f44c854SHong Zhang nexti = c2r[i]; /* next row to be added to k_th row */ 17875f44c854SHong Zhang 17885f44c854SHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 17895f44c854SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 17905f44c854SHong Zhang uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 17915f44c854SHong Zhang dk += uikdi * ba[ili]; /* update diag[k] */ 17925f44c854SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 17935f44c854SHong Zhang 17945f44c854SHong Zhang /* add multiple of row i to k-th row */ 17959371c9d4SSatish Balay jmin = ili + 1; 17969371c9d4SSatish Balay jmax = bi[i + 1]; 17975f44c854SHong Zhang if (jmin < jmax) { 17985f44c854SHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 17995f44c854SHong Zhang /* update il and c2r for row i */ 18005f44c854SHong Zhang il[i] = jmin; 18019371c9d4SSatish Balay j = bj[jmin]; 18029371c9d4SSatish Balay c2r[i] = c2r[j]; 18039371c9d4SSatish Balay c2r[j] = i; 18045f44c854SHong Zhang } 18055f44c854SHong Zhang i = nexti; 18065f44c854SHong Zhang } 18075f44c854SHong Zhang 18085f44c854SHong Zhang /* copy data into U(k,:) */ 180998a93161SHong Zhang rs = 0.0; 18109371c9d4SSatish Balay jmin = bi[k]; 18119371c9d4SSatish Balay jmax = bi[k + 1] - 1; 18125f44c854SHong Zhang if (jmin < jmax) { 18135f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 18149371c9d4SSatish Balay col = bj[j]; 18159371c9d4SSatish Balay ba[j] = rtmp[col]; 18169371c9d4SSatish Balay rs += PetscAbsScalar(ba[j]); 18175f44c854SHong Zhang } 18185f44c854SHong Zhang /* add the k-th row into il and c2r */ 18195f44c854SHong Zhang il[k] = jmin; 18209371c9d4SSatish Balay i = bj[jmin]; 18219371c9d4SSatish Balay c2r[k] = c2r[i]; 18229371c9d4SSatish Balay c2r[i] = k; 18235f44c854SHong Zhang } 182498a93161SHong Zhang 182598a93161SHong Zhang /* MatPivotCheck() */ 182698a93161SHong Zhang sctx.rs = rs; 182798a93161SHong Zhang sctx.pv = dk; 18289566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 182907b50cabSHong Zhang if (sctx.newshift) break; 183098a93161SHong Zhang dk = sctx.pv; 183198a93161SHong Zhang 183298a93161SHong Zhang ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 183398a93161SHong Zhang } 183407b50cabSHong Zhang } while (sctx.newshift); 18355f44c854SHong Zhang 1836b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 18379566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, c2r)); 18389566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 18399566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 18405f44c854SHong Zhang 18419566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 18425f44c854SHong Zhang if (perm_identity) { 184335233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 184435233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 18452169352eSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 18462169352eSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 18475f44c854SHong Zhang } else { 184835233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1; 184935233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 185035233d99SShri Abhyankar B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 185135233d99SShri Abhyankar B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 18525f44c854SHong Zhang } 18535f44c854SHong Zhang 18545f44c854SHong Zhang C->assembled = PETSC_TRUE; 18555f44c854SHong Zhang C->preallocated = PETSC_TRUE; 18562205254eSKarl Rupp 18579566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 185898a93161SHong Zhang 185998a93161SHong Zhang /* MatPivotView() */ 186098a93161SHong Zhang if (sctx.nshift) { 1861f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 18629566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 1863f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 18649566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1865f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 18669566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 186798a93161SHong Zhang } 186898a93161SHong Zhang } 18693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18705f44c854SHong Zhang } 18715f44c854SHong Zhang 1872d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 1873d71ae5a4SJacob Faibussowitsch { 1874719d5645SBarry Smith Mat C = B; 18750968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 18762ed133dbSHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 18779bfd6278SHong Zhang IS ip = b->row, iip = b->icol; 18785d0c19d7SBarry Smith const PetscInt *rip, *riip; 1879c5546cabSHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 18802ed133dbSHong Zhang PetscInt *ai = a->i, *aj = a->j; 18812ed133dbSHong Zhang PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 1882b65878eeSJunchao Zhang MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 1883b65878eeSJunchao Zhang const MatScalar *aa, *v; 1884ace3abfcSBarry Smith PetscBool perm_identity; 18850e95ead3SHong Zhang FactorShiftCtx sctx; 18860e95ead3SHong Zhang PetscReal rs; 1887b65878eeSJunchao Zhang MatScalar d; 1888*421480d9SBarry Smith const PetscInt *adiag; 1889d5d45c9bSBarry Smith 1890a6175056SHong Zhang PetscFunctionBegin; 1891*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1892b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 18930e95ead3SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 18949566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 18950e95ead3SHong Zhang 18960e95ead3SHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 18970e95ead3SHong Zhang sctx.shift_top = info->zeropivot; 18980e95ead3SHong Zhang for (i = 0; i < mbs; i++) { 18990e95ead3SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1900*421480d9SBarry Smith d = aa[adiag[i]]; 19010e95ead3SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 19020e95ead3SHong Zhang v = aa + ai[i]; 19030e95ead3SHong Zhang nz = ai[i + 1] - ai[i]; 19042205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 19050e95ead3SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 19060e95ead3SHong Zhang } 19070e95ead3SHong Zhang sctx.shift_top *= 1.1; 19080e95ead3SHong Zhang sctx.nshift_max = 5; 19090e95ead3SHong Zhang sctx.shift_lo = 0.; 19100e95ead3SHong Zhang sctx.shift_hi = 1.; 19110e95ead3SHong Zhang } 1912ee45ca4aSHong Zhang 19139566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 19149566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 19152ed133dbSHong Zhang 19162ed133dbSHong Zhang /* initialization */ 19179566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 19180e95ead3SHong Zhang 19192ed133dbSHong Zhang do { 192007b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 19210e95ead3SHong Zhang 1922c5546cabSHong Zhang for (i = 0; i < mbs; i++) jl[i] = mbs; 1923c5546cabSHong Zhang il[0] = 0; 19242ed133dbSHong Zhang 19252ed133dbSHong Zhang for (k = 0; k < mbs; k++) { 1926c5546cabSHong Zhang /* zero rtmp */ 1927c5546cabSHong Zhang nz = bi[k + 1] - bi[k]; 1928c5546cabSHong Zhang bjtmp = bj + bi[k]; 1929c5546cabSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1930c5546cabSHong Zhang 1931c05c3958SHong Zhang bval = ba + bi[k]; 19322ed133dbSHong Zhang /* initialize k-th row by the perm[k]-th row of A */ 19339371c9d4SSatish Balay jmin = ai[rip[k]]; 19349371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 19352ed133dbSHong Zhang for (j = jmin; j < jmax; j++) { 19369bfd6278SHong Zhang col = riip[aj[j]]; 19372ed133dbSHong Zhang if (col >= k) { /* only take upper triangular entry */ 19382ed133dbSHong Zhang rtmp[col] = aa[j]; 1939c05c3958SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 19402ed133dbSHong Zhang } 19412ed133dbSHong Zhang } 194239e3d630SHong Zhang /* shift the diagonal of the matrix */ 1943540703acSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount; 19442ed133dbSHong Zhang 19452ed133dbSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 19462ed133dbSHong Zhang dk = rtmp[k]; 19472ed133dbSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 19482ed133dbSHong Zhang 19492ed133dbSHong Zhang while (i < k) { 19502ed133dbSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 19512ed133dbSHong Zhang 19522ed133dbSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 19532ed133dbSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 19542ed133dbSHong Zhang uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 19552ed133dbSHong Zhang dk += uikdi * ba[ili]; 19562ed133dbSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 19572ed133dbSHong Zhang 19582ed133dbSHong Zhang /* add multiple of row i to k-th row */ 19599371c9d4SSatish Balay jmin = ili + 1; 19609371c9d4SSatish Balay jmax = bi[i + 1]; 19612ed133dbSHong Zhang if (jmin < jmax) { 19622ed133dbSHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 19632ed133dbSHong Zhang /* update il and jl for row i */ 19642ed133dbSHong Zhang il[i] = jmin; 19659371c9d4SSatish Balay j = bj[jmin]; 19669371c9d4SSatish Balay jl[i] = jl[j]; 19679371c9d4SSatish Balay jl[j] = i; 19682ed133dbSHong Zhang } 19692ed133dbSHong Zhang i = nexti; 19702ed133dbSHong Zhang } 19712ed133dbSHong Zhang 19720697b6caSHong Zhang /* shift the diagonals when zero pivot is detected */ 19730697b6caSHong Zhang /* compute rs=sum of abs(off-diagonal) */ 19740697b6caSHong Zhang rs = 0.0; 19753c9e8598SHong Zhang jmin = bi[k] + 1; 19763c9e8598SHong Zhang nz = bi[k + 1] - jmin; 19773c9e8598SHong Zhang bcol = bj + jmin; 1978ad540459SPierre Jolivet for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 1979540703acSHong Zhang 1980540703acSHong Zhang sctx.rs = rs; 1981540703acSHong Zhang sctx.pv = dk; 19829566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 198307b50cabSHong Zhang if (sctx.newshift) break; 19840e95ead3SHong Zhang dk = sctx.pv; 19853c9e8598SHong Zhang 19863c9e8598SHong Zhang /* copy data into U(k,:) */ 198739e3d630SHong Zhang ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 19889371c9d4SSatish Balay jmin = bi[k] + 1; 19899371c9d4SSatish Balay jmax = bi[k + 1]; 199039e3d630SHong Zhang if (jmin < jmax) { 199139e3d630SHong Zhang for (j = jmin; j < jmax; j++) { 19929371c9d4SSatish Balay col = bj[j]; 19939371c9d4SSatish Balay ba[j] = rtmp[col]; 19943c9e8598SHong Zhang } 199539e3d630SHong Zhang /* add the k-th row into il and jl */ 19963c9e8598SHong Zhang il[k] = jmin; 19979371c9d4SSatish Balay i = bj[jmin]; 19989371c9d4SSatish Balay jl[k] = jl[i]; 19999371c9d4SSatish Balay jl[i] = k; 20003c9e8598SHong Zhang } 20013c9e8598SHong Zhang } 200207b50cabSHong Zhang } while (sctx.newshift); 20030e95ead3SHong Zhang 20049566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, jl)); 20059566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 20069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 2007b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 2008db4efbfdSBarry Smith 20099566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 2010db4efbfdSBarry Smith if (perm_identity) { 20110a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 20120a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 20130a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 20140a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2015db4efbfdSBarry Smith } else { 20160a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 20170a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 20180a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 20190a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2020db4efbfdSBarry Smith } 2021db4efbfdSBarry Smith 20223c9e8598SHong Zhang C->assembled = PETSC_TRUE; 20233c9e8598SHong Zhang C->preallocated = PETSC_TRUE; 20242205254eSKarl Rupp 20259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 2026540703acSHong Zhang if (sctx.nshift) { 2027f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 20289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2029f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 20309566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 20310697b6caSHong Zhang } 20323c9e8598SHong Zhang } 20333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20343c9e8598SHong Zhang } 2035a6175056SHong Zhang 20368a76ed4fSHong Zhang /* 20378a76ed4fSHong Zhang icc() under revised new data structure. 20388a76ed4fSHong Zhang Factored arrays bj and ba are stored as 20398a76ed4fSHong Zhang U(0,:),...,U(i,:),U(n-1,:) 20408a76ed4fSHong Zhang 20418a76ed4fSHong Zhang ui=fact->i is an array of size n+1, in which 20428a76ed4fSHong Zhang ui+ 20438a76ed4fSHong Zhang ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 20448a76ed4fSHong Zhang ui[n]: points to U(n-1,n-1)+1 20458a76ed4fSHong Zhang 20468a76ed4fSHong Zhang udiag=fact->diag is an array of size n,in which 20478a76ed4fSHong Zhang udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 20488a76ed4fSHong Zhang 20498a76ed4fSHong Zhang U(i,:) contains udiag[i] as its last entry, i.e., 20508a76ed4fSHong Zhang U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 20518a76ed4fSHong Zhang */ 20528a76ed4fSHong Zhang 2053d71ae5a4SJacob Faibussowitsch PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2054d71ae5a4SJacob Faibussowitsch { 20550968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2056ed59401aSHong Zhang Mat_SeqSBAIJ *b; 2057*421480d9SBarry Smith PetscBool perm_identity; 20585f44c854SHong Zhang PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2059*421480d9SBarry Smith const PetscInt *rip, *riip, *adiag; 2060ed59401aSHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2061*421480d9SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 20625a8e39fbSHong Zhang PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2063ed59401aSHong Zhang PetscReal fill = info->fill, levels = info->levels; 20640298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 20650298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 20667a48dd6fSHong Zhang PetscBT lnkbt; 2067b635d36bSHong Zhang IS iperm; 2068*421480d9SBarry Smith PetscBool diagDense; 2069a6175056SHong Zhang 2070a6175056SHong Zhang PetscFunctionBegin; 207108401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2072*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense)); 2073*421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 20749566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 20759566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 20767a48dd6fSHong Zhang 20779f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 20789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 207939e3d630SHong Zhang ui[0] = 0; 208039e3d630SHong Zhang 20818a76ed4fSHong Zhang /* ICC(0) without matrix ordering: simply rearrange column indices */ 2082622e2028SHong Zhang if (!levels && perm_identity) { 20835f44c854SHong Zhang for (i = 0; i < am; i++) { 2084*421480d9SBarry Smith ncols = ai[i + 1] - adiag[i]; 2085c5546cabSHong Zhang ui[i + 1] = ui[i] + ncols; 2086c5546cabSHong Zhang udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2087dc1e2a3fSHong Zhang } 20889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2089dc1e2a3fSHong Zhang cols = uj; 2090dc1e2a3fSHong Zhang for (i = 0; i < am; i++) { 2091*421480d9SBarry Smith aj = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2092*421480d9SBarry Smith ncols = ai[i + 1] - adiag[i] - 1; 20935f44c854SHong Zhang for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2094910cf402Sprj- *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 20955f44c854SHong Zhang } 20965f44c854SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 20979566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 20989566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 20995f44c854SHong Zhang 21005f44c854SHong Zhang /* initialization */ 21019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ajtmp)); 21025f44c854SHong Zhang 21035f44c854SHong Zhang /* jl: linked list for storing indices of the pivot rows 21045f44c854SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 21059566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 21065f44c854SHong Zhang for (i = 0; i < am; i++) { 21079371c9d4SSatish Balay jl[i] = am; 21089371c9d4SSatish Balay il[i] = 0; 21095f44c854SHong Zhang } 21105f44c854SHong Zhang 21115f44c854SHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 21125f44c854SHong Zhang nlnk = am + 1; 21139566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 21145f44c854SHong Zhang 211595b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 21169566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 21175f44c854SHong Zhang current_space = free_space; 21189566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 21195f44c854SHong Zhang current_space_lvl = free_space_lvl; 21205f44c854SHong Zhang 21215f44c854SHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 21225f44c854SHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 21235f44c854SHong Zhang nzk = 0; 21245f44c854SHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 212528b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 21265f44c854SHong Zhang ncols_upper = 0; 21275f44c854SHong Zhang for (j = 0; j < ncols; j++) { 21285f44c854SHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 21295f44c854SHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 21305f44c854SHong Zhang ajtmp[ncols_upper] = i; 21315f44c854SHong Zhang ncols_upper++; 21325f44c854SHong Zhang } 21335f44c854SHong Zhang } 21349566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 21355f44c854SHong Zhang nzk += nlnk; 21365f44c854SHong Zhang 21375f44c854SHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 21385f44c854SHong Zhang prow = jl[k]; /* 1st pivot row */ 21395f44c854SHong Zhang 21405f44c854SHong Zhang while (prow < k) { 21415f44c854SHong Zhang nextprow = jl[prow]; 21425f44c854SHong Zhang 21435f44c854SHong Zhang /* merge prow into k-th row */ 21445f44c854SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 21455f44c854SHong Zhang jmax = ui[prow + 1]; 21465f44c854SHong Zhang ncols = jmax - jmin; 21475f44c854SHong Zhang i = jmin - ui[prow]; 21485f44c854SHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 21495f44c854SHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 21505f44c854SHong Zhang j = *(uj - 1); 21519566063dSJacob Faibussowitsch PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 21525f44c854SHong Zhang nzk += nlnk; 21535f44c854SHong Zhang 21545f44c854SHong Zhang /* update il and jl for prow */ 21555f44c854SHong Zhang if (jmin < jmax) { 21565f44c854SHong Zhang il[prow] = jmin; 21579371c9d4SSatish Balay j = *cols; 21589371c9d4SSatish Balay jl[prow] = jl[j]; 21599371c9d4SSatish Balay jl[j] = prow; 21605f44c854SHong Zhang } 21615f44c854SHong Zhang prow = nextprow; 21625f44c854SHong Zhang } 21635f44c854SHong Zhang 21645f44c854SHong Zhang /* if free space is not available, make more free space */ 21655f44c854SHong Zhang if (current_space->local_remaining < nzk) { 21665f44c854SHong Zhang i = am - k + 1; /* num of unfactored rows */ 2167f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 21689566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 21699566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 21705f44c854SHong Zhang reallocs++; 21715f44c854SHong Zhang } 21725f44c854SHong Zhang 21735f44c854SHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 217408401ef6SPierre Jolivet PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 21759566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 21765f44c854SHong Zhang 21775f44c854SHong Zhang /* add the k-th row into il and jl */ 21785f44c854SHong Zhang if (nzk > 1) { 21795f44c854SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 21809371c9d4SSatish Balay jl[k] = jl[i]; 21819371c9d4SSatish Balay jl[i] = k; 21825f44c854SHong Zhang il[k] = ui[k] + 1; 21835f44c854SHong Zhang } 21845f44c854SHong Zhang uj_ptr[k] = current_space->array; 21855f44c854SHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 21865f44c854SHong Zhang 21875f44c854SHong Zhang current_space->array += nzk; 21885f44c854SHong Zhang current_space->local_used += nzk; 21895f44c854SHong Zhang current_space->local_remaining -= nzk; 21905f44c854SHong Zhang 21915f44c854SHong Zhang current_space_lvl->array += nzk; 21925f44c854SHong Zhang current_space_lvl->local_used += nzk; 21935f44c854SHong Zhang current_space_lvl->local_remaining -= nzk; 21945f44c854SHong Zhang 21955f44c854SHong Zhang ui[k + 1] = ui[k] + nzk; 21965f44c854SHong Zhang } 21975f44c854SHong Zhang 21989566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 21999566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 22009566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 22019566063dSJacob Faibussowitsch PetscCall(PetscFree(ajtmp)); 22025f44c854SHong Zhang 22039263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 22049f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj)); 22059566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 22069566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 22079566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 22085f44c854SHong Zhang 22095f44c854SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 22105f44c854SHong Zhang 22115f44c854SHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 221257508eceSPierre Jolivet b = (Mat_SeqSBAIJ *)fact->data; 22139f0612e4SBarry Smith b->free_ij = PETSC_TRUE; 221484648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 22159f0612e4SBarry Smith b->free_a = PETSC_TRUE; 22165f44c854SHong Zhang b->j = uj; 22175f44c854SHong Zhang b->i = ui; 22185f44c854SHong Zhang b->diag = udiag; 2219f4259b30SLisandro Dalcin b->ilen = NULL; 2220f4259b30SLisandro Dalcin b->imax = NULL; 22215f44c854SHong Zhang b->row = perm; 22225f44c854SHong Zhang b->col = perm; 22239566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 22249566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 22255f44c854SHong Zhang b->icol = iperm; 22265f44c854SHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 22272205254eSKarl Rupp 222884648c2dSPierre Jolivet PetscCall(PetscMalloc1(am, &b->solve_work)); 22292205254eSKarl Rupp 22305f44c854SHong Zhang b->maxnz = b->nz = ui[am]; 22315f44c854SHong Zhang 2232f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2233f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 22345f44c854SHong Zhang if (ai[am] != 0) { 22356a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 223695b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 22375f44c854SHong Zhang } else { 2238f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 22395f44c854SHong Zhang } 22409263d837SHong Zhang #if defined(PETSC_USE_INFO) 22419263d837SHong Zhang if (ai[am] != 0) { 22429263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 22439566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 22449566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 22459566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 22469263d837SHong Zhang } else { 22479566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 22489263d837SHong Zhang } 22499263d837SHong Zhang #endif 225035233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 22513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 22525f44c854SHong Zhang } 22535f44c854SHong Zhang 2254d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2255d71ae5a4SJacob Faibussowitsch { 22560be760fbSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 22570be760fbSHong Zhang Mat_SeqSBAIJ *b; 2258*421480d9SBarry Smith PetscBool perm_identity; 22590be760fbSHong Zhang PetscReal fill = info->fill; 22600be760fbSHong Zhang const PetscInt *rip, *riip; 22610be760fbSHong Zhang PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 22620be760fbSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 22630be760fbSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 22640298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 22650be760fbSHong Zhang PetscBT lnkbt; 22660be760fbSHong Zhang IS iperm; 2267*421480d9SBarry Smith PetscBool diagDense; 22680be760fbSHong Zhang 22690be760fbSHong Zhang PetscFunctionBegin; 227008401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2271*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 2272*421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 22739186b105SHong Zhang 22740be760fbSHong Zhang /* check whether perm is the identity mapping */ 22759566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 22769566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 22779566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 22789566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 22790be760fbSHong Zhang 22800be760fbSHong Zhang /* initialization */ 22819f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 22829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 22830be760fbSHong Zhang ui[0] = 0; 22840be760fbSHong Zhang 22850be760fbSHong Zhang /* jl: linked list for storing indices of the pivot rows 22860be760fbSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 22879566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 22880be760fbSHong Zhang for (i = 0; i < am; i++) { 22899371c9d4SSatish Balay jl[i] = am; 22909371c9d4SSatish Balay il[i] = 0; 22910be760fbSHong Zhang } 22920be760fbSHong Zhang 22930be760fbSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 22940be760fbSHong Zhang nlnk = am + 1; 22959566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 22960be760fbSHong Zhang 229795b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 22989566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 22990be760fbSHong Zhang current_space = free_space; 23000be760fbSHong Zhang 23010be760fbSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 23020be760fbSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 23030be760fbSHong Zhang nzk = 0; 23040be760fbSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 230528b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 23060be760fbSHong Zhang ncols_upper = 0; 23070be760fbSHong Zhang for (j = 0; j < ncols; j++) { 23080be760fbSHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 23090be760fbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 23100be760fbSHong Zhang cols[ncols_upper] = i; 23110be760fbSHong Zhang ncols_upper++; 23120be760fbSHong Zhang } 23130be760fbSHong Zhang } 23149566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 23150be760fbSHong Zhang nzk += nlnk; 23160be760fbSHong Zhang 23170be760fbSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 23180be760fbSHong Zhang prow = jl[k]; /* 1st pivot row */ 23190be760fbSHong Zhang 23200be760fbSHong Zhang while (prow < k) { 23210be760fbSHong Zhang nextprow = jl[prow]; 23220be760fbSHong Zhang /* merge prow into k-th row */ 23230be760fbSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 23240be760fbSHong Zhang jmax = ui[prow + 1]; 23250be760fbSHong Zhang ncols = jmax - jmin; 23260be760fbSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 23279566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 23280be760fbSHong Zhang nzk += nlnk; 23290be760fbSHong Zhang 23300be760fbSHong Zhang /* update il and jl for prow */ 23310be760fbSHong Zhang if (jmin < jmax) { 23320be760fbSHong Zhang il[prow] = jmin; 23332205254eSKarl Rupp j = *uj_ptr; 23342205254eSKarl Rupp jl[prow] = jl[j]; 23352205254eSKarl Rupp jl[j] = prow; 23360be760fbSHong Zhang } 23370be760fbSHong Zhang prow = nextprow; 23380be760fbSHong Zhang } 23390be760fbSHong Zhang 23400be760fbSHong Zhang /* if free space is not available, make more free space */ 23410be760fbSHong Zhang if (current_space->local_remaining < nzk) { 23420be760fbSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2343f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 23449566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 23450be760fbSHong Zhang reallocs++; 23460be760fbSHong Zhang } 23470be760fbSHong Zhang 23480be760fbSHong Zhang /* copy data into free space, then initialize lnk */ 23499566063dSJacob Faibussowitsch PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 23500be760fbSHong Zhang 23510be760fbSHong Zhang /* add the k-th row into il and jl */ 23527b056e98SHong Zhang if (nzk > 1) { 23530be760fbSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 23549371c9d4SSatish Balay jl[k] = jl[i]; 23559371c9d4SSatish Balay jl[i] = k; 23560be760fbSHong Zhang il[k] = ui[k] + 1; 23570be760fbSHong Zhang } 23580be760fbSHong Zhang ui_ptr[k] = current_space->array; 23592205254eSKarl Rupp 23600be760fbSHong Zhang current_space->array += nzk; 23610be760fbSHong Zhang current_space->local_used += nzk; 23620be760fbSHong Zhang current_space->local_remaining -= nzk; 23630be760fbSHong Zhang 23640be760fbSHong Zhang ui[k + 1] = ui[k] + nzk; 23650be760fbSHong Zhang } 23660be760fbSHong Zhang 23679566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 23689566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 23699566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 23700be760fbSHong Zhang 23719263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 237284648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj)); 23739566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 23749566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 23750be760fbSHong Zhang 23760be760fbSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 2377f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 23780be760fbSHong Zhang b->free_ij = PETSC_TRUE; 237984648c2dSPierre Jolivet PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 23809f0612e4SBarry Smith b->free_a = PETSC_TRUE; 23810be760fbSHong Zhang b->j = uj; 23820be760fbSHong Zhang b->i = ui; 23830be760fbSHong Zhang b->diag = udiag; 2384f4259b30SLisandro Dalcin b->ilen = NULL; 2385f4259b30SLisandro Dalcin b->imax = NULL; 23860be760fbSHong Zhang b->row = perm; 23870be760fbSHong Zhang b->col = perm; 238826fbe8dcSKarl Rupp 23899566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 23909566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 23912205254eSKarl Rupp 23920be760fbSHong Zhang b->icol = iperm; 23930be760fbSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 239426fbe8dcSKarl Rupp 239584648c2dSPierre Jolivet PetscCall(PetscMalloc1(am, &b->solve_work)); 23962205254eSKarl Rupp 23970be760fbSHong Zhang b->maxnz = b->nz = ui[am]; 23980be760fbSHong Zhang 2399f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2400f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 24010be760fbSHong Zhang if (ai[am] != 0) { 24026a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 240395b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 24040be760fbSHong Zhang } else { 2405f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 24060be760fbSHong Zhang } 24079263d837SHong Zhang #if defined(PETSC_USE_INFO) 24089263d837SHong Zhang if (ai[am] != 0) { 24099263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 24109566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 24119566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 24129566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 24139263d837SHong Zhang } else { 24149566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 24159263d837SHong Zhang } 24169263d837SHong Zhang #endif 241735233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 24183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24190be760fbSHong Zhang } 24200be760fbSHong Zhang 2421d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 2422d71ae5a4SJacob Faibussowitsch { 2423813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2424813a1f2bSShri Abhyankar PetscInt n = A->rmap->n; 2425813a1f2bSShri Abhyankar const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 2426813a1f2bSShri Abhyankar PetscScalar *x, sum; 2427813a1f2bSShri Abhyankar const PetscScalar *b; 2428b65878eeSJunchao Zhang const MatScalar *aa, *v; 2429813a1f2bSShri Abhyankar PetscInt i, nz; 2430813a1f2bSShri Abhyankar 2431813a1f2bSShri Abhyankar PetscFunctionBegin; 24323ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 2433813a1f2bSShri Abhyankar 2434b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 24359566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 24369566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 2437813a1f2bSShri Abhyankar 2438813a1f2bSShri Abhyankar /* forward solve the lower triangular */ 2439813a1f2bSShri Abhyankar x[0] = b[0]; 2440813a1f2bSShri Abhyankar v = aa; 2441813a1f2bSShri Abhyankar vi = aj; 2442813a1f2bSShri Abhyankar for (i = 1; i < n; i++) { 2443813a1f2bSShri Abhyankar nz = ai[i + 1] - ai[i]; 2444813a1f2bSShri Abhyankar sum = b[i]; 2445813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 2446813a1f2bSShri Abhyankar v += nz; 2447813a1f2bSShri Abhyankar vi += nz; 2448813a1f2bSShri Abhyankar x[i] = sum; 2449813a1f2bSShri Abhyankar } 2450813a1f2bSShri Abhyankar 2451813a1f2bSShri Abhyankar /* backward solve the upper triangular */ 245262fbd6afSShri Abhyankar for (i = n - 1; i >= 0; i--) { 24533c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 24543c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 2455813a1f2bSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 2456813a1f2bSShri Abhyankar sum = x[i]; 2457813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 24583c0119dfSShri Abhyankar x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 2459813a1f2bSShri Abhyankar } 2460813a1f2bSShri Abhyankar 24619566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 2462b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 24639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 24649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 24653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2466813a1f2bSShri Abhyankar } 2467813a1f2bSShri Abhyankar 2468d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 2469d71ae5a4SJacob Faibussowitsch { 2470f268cba8SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2471f268cba8SShri Abhyankar IS iscol = a->col, isrow = a->row; 2472f268cba8SShri Abhyankar PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 2473f268cba8SShri Abhyankar const PetscInt *rout, *cout, *r, *c; 2474f268cba8SShri Abhyankar PetscScalar *x, *tmp, sum; 2475f268cba8SShri Abhyankar const PetscScalar *b; 2476b65878eeSJunchao Zhang const MatScalar *aa, *v; 2477f268cba8SShri Abhyankar 2478f268cba8SShri Abhyankar PetscFunctionBegin; 24793ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 2480f268cba8SShri Abhyankar 2481b65878eeSJunchao Zhang PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 24829566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 24839566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 2484f268cba8SShri Abhyankar tmp = a->solve_work; 2485f268cba8SShri Abhyankar 24869371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 24879371c9d4SSatish Balay r = rout; 24889371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 24899371c9d4SSatish Balay c = cout; 2490f268cba8SShri Abhyankar 2491f268cba8SShri Abhyankar /* forward solve the lower triangular */ 2492f268cba8SShri Abhyankar tmp[0] = b[r[0]]; 2493f268cba8SShri Abhyankar v = aa; 2494f268cba8SShri Abhyankar vi = aj; 2495f268cba8SShri Abhyankar for (i = 1; i < n; i++) { 2496f268cba8SShri Abhyankar nz = ai[i + 1] - ai[i]; 2497f268cba8SShri Abhyankar sum = b[r[i]]; 2498f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 2499f268cba8SShri Abhyankar tmp[i] = sum; 25009371c9d4SSatish Balay v += nz; 25019371c9d4SSatish Balay vi += nz; 2502f268cba8SShri Abhyankar } 2503f268cba8SShri Abhyankar 2504f268cba8SShri Abhyankar /* backward solve the upper triangular */ 2505f268cba8SShri Abhyankar for (i = n - 1; i >= 0; i--) { 25063c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 25073c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 2508f268cba8SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 2509f268cba8SShri Abhyankar sum = tmp[i]; 2510f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 2511f268cba8SShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 2512f268cba8SShri Abhyankar } 2513f268cba8SShri Abhyankar 25149566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 25159566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 2516b65878eeSJunchao Zhang PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 25179566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 25189566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 25199566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 25203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2521f268cba8SShri Abhyankar } 2522