xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 421480d92be24cdb9933c60510b8e175c0a8d034)
1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
3c6db04a5SJed Brown #include <petscbt.h>
4c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
53b2fbd54SBarry Smith 
6d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
7d71ae5a4SJacob Faibussowitsch {
84ac6704cSBarry Smith   PetscFunctionBegin;
94ac6704cSBarry Smith   *type = MATSOLVERPETSC;
103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
114ac6704cSBarry Smith }
124ac6704cSBarry Smith 
13d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
14d71ae5a4SJacob Faibussowitsch {
15d0f46423SBarry Smith   PetscInt n = A->rmap->n;
16b24902e0SBarry Smith 
17b24902e0SBarry Smith   PetscFunctionBegin;
18891bceaeSHong Zhang #if defined(PETSC_USE_COMPLEX)
1903e5aca4SStefano Zampini   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
2003e5aca4SStefano Zampini     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
2103e5aca4SStefano Zampini     *B = NULL;
2203e5aca4SStefano Zampini     PetscFunctionReturn(PETSC_SUCCESS);
2303e5aca4SStefano Zampini   }
24891bceaeSHong Zhang #endif
2503e5aca4SStefano Zampini 
269566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
279566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, n, n, n, n));
28599ef60dSHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
299566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQAIJ));
302205254eSKarl Rupp 
3135233d99SShri Abhyankar     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
3235233d99SShri Abhyankar     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
332205254eSKarl Rupp 
349566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
359566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
369566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
379566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
38b24902e0SBarry Smith   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
399566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQSBAIJ));
409566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
412205254eSKarl Rupp 
4235233d99SShri Abhyankar     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
4335233d99SShri Abhyankar     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
449566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
459566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
46e32f2f54SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
47d5f3da31SBarry Smith   (*B)->factortype = ftype;
4800c67f3bSHong Zhang 
499566063dSJacob Faibussowitsch   PetscCall(PetscFree((*B)->solvertype));
509566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
51f73b0415SBarry Smith   (*B)->canuseordering = PETSC_TRUE;
529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
54b24902e0SBarry Smith }
55b24902e0SBarry Smith 
56d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
57d71ae5a4SJacob Faibussowitsch {
58ce3d78c0SShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
59ce3d78c0SShri Abhyankar   IS                 isicol;
608758e1faSBarry Smith   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
618758e1faSBarry Smith   PetscInt           i, n = A->rmap->n;
628758e1faSBarry Smith   PetscInt          *bi, *bj;
63ce3d78c0SShri Abhyankar   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
64ce3d78c0SShri Abhyankar   PetscReal          f;
65ce3d78c0SShri Abhyankar   PetscInt           nlnk, *lnk, k, **bi_ptr;
660298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
67ce3d78c0SShri Abhyankar   PetscBT            lnkbt;
68*421480d9SBarry Smith   PetscBool          diagDense;
69ce3d78c0SShri Abhyankar 
70ce3d78c0SShri Abhyankar   PetscFunctionBegin;
7108401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
72*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
73*421480d9SBarry Smith   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
74ece542f9SHong Zhang 
759566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
769566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
779566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
78ce3d78c0SShri Abhyankar 
791bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
809f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
819566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
82a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
839b48462bSShri Abhyankar 
849b48462bSShri Abhyankar   /* linked list for storing column indices of the active row */
859b48462bSShri Abhyankar   nlnk = n + 1;
869566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
879b48462bSShri Abhyankar 
889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
899b48462bSShri Abhyankar 
909b48462bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
919b48462bSShri Abhyankar   f = info->fill;
92aa91b3e7SRichard Tran Mills   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
939566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
949b48462bSShri Abhyankar   current_space = free_space;
959b48462bSShri Abhyankar 
969b48462bSShri Abhyankar   for (i = 0; i < n; i++) {
979b48462bSShri Abhyankar     /* copy previous fill into linked list */
989b48462bSShri Abhyankar     nzi   = 0;
999b48462bSShri Abhyankar     nnz   = ai[r[i] + 1] - ai[r[i]];
1009b48462bSShri Abhyankar     ajtmp = aj + ai[r[i]];
1019566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
1029b48462bSShri Abhyankar     nzi += nlnk;
1039b48462bSShri Abhyankar 
1049b48462bSShri Abhyankar     /* add pivot rows into linked list */
1059b48462bSShri Abhyankar     row = lnk[n];
1069b48462bSShri Abhyankar     while (row < i) {
1079b48462bSShri Abhyankar       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
1089b48462bSShri Abhyankar       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
1099566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
1109b48462bSShri Abhyankar       nzi += nlnk;
1119b48462bSShri Abhyankar       row = lnk[row];
1129b48462bSShri Abhyankar     }
1139b48462bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
1149b48462bSShri Abhyankar     im[i]     = nzi;
1159b48462bSShri Abhyankar 
1169b48462bSShri Abhyankar     /* mark bdiag */
1179b48462bSShri Abhyankar     nzbd = 0;
1189b48462bSShri Abhyankar     nnz  = nzi;
1199b48462bSShri Abhyankar     k    = lnk[n];
1209b48462bSShri Abhyankar     while (nnz-- && k < i) {
1219b48462bSShri Abhyankar       nzbd++;
1229b48462bSShri Abhyankar       k = lnk[k];
1239b48462bSShri Abhyankar     }
1249b48462bSShri Abhyankar     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
1259b48462bSShri Abhyankar 
1269b48462bSShri Abhyankar     /* if free space is not available, make more free space */
1279b48462bSShri Abhyankar     if (current_space->local_remaining < nzi) {
1282f18eb33SBarry Smith       /* estimated additional space needed */
1291df4278eSBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
1309566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1319b48462bSShri Abhyankar       reallocs++;
1329b48462bSShri Abhyankar     }
1339b48462bSShri Abhyankar 
1349b48462bSShri Abhyankar     /* copy data into free space, then initialize lnk */
1359566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
1362205254eSKarl Rupp 
1379b48462bSShri Abhyankar     bi_ptr[i] = current_space->array;
1389b48462bSShri Abhyankar     current_space->array += nzi;
1399b48462bSShri Abhyankar     current_space->local_used += nzi;
1409b48462bSShri Abhyankar     current_space->local_remaining -= nzi;
1419b48462bSShri Abhyankar   }
1429b48462bSShri Abhyankar 
1439566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
1449566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
1459b48462bSShri Abhyankar 
1469263d837SHong Zhang   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
14784648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
1489566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1499566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
1509566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
1519b48462bSShri Abhyankar 
1529b48462bSShri Abhyankar   /* put together the new matrix */
1539566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
15457508eceSPierre Jolivet   b          = (Mat_SeqAIJ *)B->data;
1559b48462bSShri Abhyankar   b->free_ij = PETSC_TRUE;
1569f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1579f0612e4SBarry Smith   b->free_a = PETSC_TRUE;
1589b48462bSShri Abhyankar   b->j      = bj;
1599b48462bSShri Abhyankar   b->i      = bi;
1609b48462bSShri Abhyankar   b->diag   = bdiag;
161f4259b30SLisandro Dalcin   b->ilen   = NULL;
162f4259b30SLisandro Dalcin   b->imax   = NULL;
1639b48462bSShri Abhyankar   b->row    = isrow;
1649b48462bSShri Abhyankar   b->col    = iscol;
1659566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
1669566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
1679b48462bSShri Abhyankar   b->icol = isicol;
16884648c2dSPierre Jolivet   PetscCall(PetscMalloc1(n, &b->solve_work));
1699b48462bSShri Abhyankar 
1709b48462bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
1719b48462bSShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
1722205254eSKarl Rupp 
173d5f3da31SBarry Smith   B->factortype            = MAT_FACTOR_LU;
174f284f12aSHong Zhang   B->info.factor_mallocs   = reallocs;
175f284f12aSHong Zhang   B->info.fill_ratio_given = f;
1769b48462bSShri Abhyankar 
1779b48462bSShri Abhyankar   if (ai[n]) {
178f284f12aSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1799b48462bSShri Abhyankar   } else {
180f284f12aSHong Zhang     B->info.fill_ratio_needed = 0.0;
1819b48462bSShri Abhyankar   }
1829263d837SHong Zhang #if defined(PETSC_USE_INFO)
1839263d837SHong Zhang   if (ai[n] != 0) {
1849263d837SHong Zhang     PetscReal af = B->info.fill_ratio_needed;
1859566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1869566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
1879566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
1889566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
1899263d837SHong Zhang   } else {
1909566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
1919263d837SHong Zhang   }
1929263d837SHong Zhang #endif
19335233d99SShri Abhyankar   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1944d12350bSJunchao Zhang   if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1959566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
1963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1979b48462bSShri Abhyankar }
1989b48462bSShri Abhyankar 
1995b5bc046SBarry Smith /*
2005b5bc046SBarry Smith     Trouble in factorization, should we dump the original matrix?
2015b5bc046SBarry Smith */
202d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorDumpMatrix(Mat A)
203d71ae5a4SJacob Faibussowitsch {
204ace3abfcSBarry Smith   PetscBool flg = PETSC_FALSE;
2055b5bc046SBarry Smith 
2065b5bc046SBarry Smith   PetscFunctionBegin;
2079566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
2085b5bc046SBarry Smith   if (flg) {
2095b5bc046SBarry Smith     PetscViewer viewer;
2105b5bc046SBarry Smith     char        filename[PETSC_MAX_PATH_LEN];
2115b5bc046SBarry Smith 
2129566063dSJacob Faibussowitsch     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
2139566063dSJacob Faibussowitsch     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
2149566063dSJacob Faibussowitsch     PetscCall(MatView(A, viewer));
2159566063dSJacob Faibussowitsch     PetscCall(PetscViewerDestroy(&viewer));
2165b5bc046SBarry Smith   }
2173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2185b5bc046SBarry Smith }
2195b5bc046SBarry Smith 
220d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
221d71ae5a4SJacob Faibussowitsch {
222c9c72f8fSHong Zhang   Mat              C = B;
223c9c72f8fSHong Zhang   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
224c9c72f8fSHong Zhang   IS               isrow = b->row, isicol = b->icol;
225c9c72f8fSHong Zhang   const PetscInt  *r, *ic, *ics;
226bbd65245SShri Abhyankar   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
227bbd65245SShri Abhyankar   PetscInt         i, j, k, nz, nzL, row, *pj;
228bbd65245SShri Abhyankar   const PetscInt  *ajtmp, *bjtmp;
229bbd65245SShri Abhyankar   MatScalar       *rtmp, *pc, multiplier, *pv;
230b65878eeSJunchao Zhang   const MatScalar *aa, *v;
231b65878eeSJunchao Zhang   MatScalar       *ba;
232ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
233d90ac83dSHong Zhang   FactorShiftCtx   sctx;
2344f81c4b7SBarry Smith   const PetscInt  *ddiag;
235d4ad06f7SHong Zhang   PetscReal        rs;
236d4ad06f7SHong Zhang   MatScalar        d;
237d4ad06f7SHong Zhang 
238c9c72f8fSHong Zhang   PetscFunctionBegin;
239b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
240b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
241d6e5152cSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
2429566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
243d4ad06f7SHong Zhang 
244f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
245*421480d9SBarry Smith     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
246d4ad06f7SHong Zhang     sctx.shift_top = info->zeropivot;
247d4ad06f7SHong Zhang     for (i = 0; i < n; i++) {
248d4ad06f7SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
249*421480d9SBarry Smith       d  = aa[ddiag[i]];
250d4ad06f7SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
251d4ad06f7SHong Zhang       v  = aa + ai[i];
252d4ad06f7SHong Zhang       nz = ai[i + 1] - ai[i];
2532205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
254d4ad06f7SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
255d4ad06f7SHong Zhang     }
256d4ad06f7SHong Zhang     sctx.shift_top *= 1.1;
257d4ad06f7SHong Zhang     sctx.nshift_max = 5;
258d4ad06f7SHong Zhang     sctx.shift_lo   = 0.;
259d4ad06f7SHong Zhang     sctx.shift_hi   = 1.;
260d4ad06f7SHong Zhang   }
261d4ad06f7SHong Zhang 
2629566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
2639566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
2649566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
265c9c72f8fSHong Zhang   ics = ic;
266c9c72f8fSHong Zhang 
267d4ad06f7SHong Zhang   do {
26807b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
269c9c72f8fSHong Zhang     for (i = 0; i < n; i++) {
270c9c72f8fSHong Zhang       /* zero rtmp */
271c9c72f8fSHong Zhang       /* L part */
272c9c72f8fSHong Zhang       nz    = bi[i + 1] - bi[i];
273c9c72f8fSHong Zhang       bjtmp = bj + bi[i];
274c9c72f8fSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
275c9c72f8fSHong Zhang 
276c9c72f8fSHong Zhang       /* U part */
27762fbd6afSShri Abhyankar       nz    = bdiag[i] - bdiag[i + 1];
27862fbd6afSShri Abhyankar       bjtmp = bj + bdiag[i + 1] + 1;
279813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
280813a1f2bSShri Abhyankar 
281813a1f2bSShri Abhyankar       /* load in initial (unfactored row) */
282813a1f2bSShri Abhyankar       nz    = ai[r[i] + 1] - ai[r[i]];
283813a1f2bSShri Abhyankar       ajtmp = aj + ai[r[i]];
284813a1f2bSShri Abhyankar       v     = aa + ai[r[i]];
285ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
286813a1f2bSShri Abhyankar       /* ZeropivotApply() */
28798a93161SHong Zhang       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
288813a1f2bSShri Abhyankar 
289813a1f2bSShri Abhyankar       /* elimination */
290813a1f2bSShri Abhyankar       bjtmp = bj + bi[i];
291813a1f2bSShri Abhyankar       row   = *bjtmp++;
292813a1f2bSShri Abhyankar       nzL   = bi[i + 1] - bi[i];
293f268cba8SShri Abhyankar       for (k = 0; k < nzL; k++) {
294813a1f2bSShri Abhyankar         pc = rtmp + row;
295813a1f2bSShri Abhyankar         if (*pc != 0.0) {
296b65878eeSJunchao Zhang           pv         = ba + bdiag[row];
297813a1f2bSShri Abhyankar           multiplier = *pc * (*pv);
298813a1f2bSShri Abhyankar           *pc        = multiplier;
2992205254eSKarl Rupp 
30062fbd6afSShri Abhyankar           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
301b65878eeSJunchao Zhang           pv = ba + bdiag[row + 1] + 1;
30262fbd6afSShri Abhyankar           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
3032205254eSKarl Rupp 
304813a1f2bSShri Abhyankar           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
3059566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
306813a1f2bSShri Abhyankar         }
307f268cba8SShri Abhyankar         row = *bjtmp++;
308813a1f2bSShri Abhyankar       }
309813a1f2bSShri Abhyankar 
310813a1f2bSShri Abhyankar       /* finished row so stick it into b->a */
311813a1f2bSShri Abhyankar       rs = 0.0;
312813a1f2bSShri Abhyankar       /* L part */
313b65878eeSJunchao Zhang       pv = ba + bi[i];
314813a1f2bSShri Abhyankar       pj = b->j + bi[i];
315813a1f2bSShri Abhyankar       nz = bi[i + 1] - bi[i];
316813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
3179371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
3189371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
319813a1f2bSShri Abhyankar       }
320813a1f2bSShri Abhyankar 
321813a1f2bSShri Abhyankar       /* U part */
322b65878eeSJunchao Zhang       pv = ba + bdiag[i + 1] + 1;
32362fbd6afSShri Abhyankar       pj = b->j + bdiag[i + 1] + 1;
32462fbd6afSShri Abhyankar       nz = bdiag[i] - bdiag[i + 1] - 1;
325813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
3269371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
3279371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
328813a1f2bSShri Abhyankar       }
329813a1f2bSShri Abhyankar 
330813a1f2bSShri Abhyankar       sctx.rs = rs;
331813a1f2bSShri Abhyankar       sctx.pv = rtmp[i];
3329566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
33307b50cabSHong Zhang       if (sctx.newshift) break; /* break for-loop */
33407b50cabSHong Zhang       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
335813a1f2bSShri Abhyankar 
336a5b23f4aSJose E. Roman       /* Mark diagonal and invert diagonal for simpler triangular solves */
337b65878eeSJunchao Zhang       pv  = ba + bdiag[i];
338813a1f2bSShri Abhyankar       *pv = 1.0 / rtmp[i];
339813a1f2bSShri Abhyankar 
340813a1f2bSShri Abhyankar     } /* endof for (i=0; i<n; i++) { */
341813a1f2bSShri Abhyankar 
3428ff23777SHong Zhang     /* MatPivotRefine() */
34307b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
344813a1f2bSShri Abhyankar       /*
345813a1f2bSShri Abhyankar        * if no shift in this attempt & shifting & started shifting & can refine,
346813a1f2bSShri Abhyankar        * then try lower shift
347813a1f2bSShri Abhyankar        */
348813a1f2bSShri Abhyankar       sctx.shift_hi       = sctx.shift_fraction;
349813a1f2bSShri Abhyankar       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
350813a1f2bSShri Abhyankar       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
35107b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
352813a1f2bSShri Abhyankar       sctx.nshift++;
353813a1f2bSShri Abhyankar     }
35407b50cabSHong Zhang   } while (sctx.newshift);
355813a1f2bSShri Abhyankar 
356b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
357b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
358b65878eeSJunchao Zhang 
3599566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
3609566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
3619566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
362d3ac4fa3SBarry Smith 
3639566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
3649566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
3654d12350bSJunchao Zhang   if (b->inode.size_csr) {
366abb87a52SBarry Smith     C->ops->solve = MatSolve_SeqAIJ_Inode;
367abb87a52SBarry Smith   } else if (row_identity && col_identity) {
36835233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
369813a1f2bSShri Abhyankar   } else {
37035233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ;
371813a1f2bSShri Abhyankar   }
37235233d99SShri Abhyankar   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
37335233d99SShri Abhyankar   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
37435233d99SShri Abhyankar   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
37535233d99SShri Abhyankar   C->ops->matsolve          = MatMatSolve_SeqAIJ;
376a3d9026eSPierre Jolivet   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
377813a1f2bSShri Abhyankar   C->assembled              = PETSC_TRUE;
378813a1f2bSShri Abhyankar   C->preallocated           = PETSC_TRUE;
3792205254eSKarl Rupp 
3809566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
381813a1f2bSShri Abhyankar 
38214d2772eSHong Zhang   /* MatShiftView(A,info,&sctx) */
383813a1f2bSShri Abhyankar   if (sctx.nshift) {
384f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
3859566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
386f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
3879566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
388f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
3899566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
390813a1f2bSShri Abhyankar     }
391813a1f2bSShri Abhyankar   }
3923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
393813a1f2bSShri Abhyankar }
394813a1f2bSShri Abhyankar 
395ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
396ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
397ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
398ff6a9541SJacob Faibussowitsch 
399d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
400d71ae5a4SJacob Faibussowitsch {
401719d5645SBarry Smith   Mat              C = B;
402416022c9SBarry Smith   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
40382bf6240SBarry Smith   IS               isrow = b->row, isicol = b->icol;
4045d0c19d7SBarry Smith   const PetscInt  *r, *ic, *ics;
405d278edc7SBarry Smith   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
406d278edc7SBarry Smith   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
407*421480d9SBarry Smith   const PetscInt  *ajtmp, *bjtmp, *ddiag, *pj;
408d278edc7SBarry Smith   MatScalar       *pv, *rtmp, *pc, multiplier, d;
409b65878eeSJunchao Zhang   const MatScalar *v, *aa;
410b65878eeSJunchao Zhang   MatScalar       *ba;
41133f9893dSHong Zhang   PetscReal        rs = 0.0;
412d90ac83dSHong Zhang   FactorShiftCtx   sctx;
413ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
414289bc588SBarry Smith 
4153a40ed3dSBarry Smith   PetscFunctionBegin;
416*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
417*421480d9SBarry Smith 
418b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
419b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
420504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
4219566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
422ada7143aSSatish Balay 
423f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
424*421480d9SBarry Smith     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
4259f7d0b68SHong Zhang     sctx.shift_top = info->zeropivot;
4266cc28720Svictorle     for (i = 0; i < n; i++) {
4279f95998fSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
428*421480d9SBarry Smith       d  = aa[ddiag[i]];
4291a890ab1SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
4309f7d0b68SHong Zhang       v  = aa + ai[i];
4319f7d0b68SHong Zhang       nz = ai[i + 1] - ai[i];
4322205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
4331a890ab1SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
4346cc28720Svictorle     }
435118f5789SBarry Smith     sctx.shift_top *= 1.1;
436d2276718SHong Zhang     sctx.nshift_max = 5;
437d2276718SHong Zhang     sctx.shift_lo   = 0.;
438d2276718SHong Zhang     sctx.shift_hi   = 1.;
439a0a356efSHong Zhang   }
440a0a356efSHong Zhang 
4419566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
4429566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
4439566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
444504a3fadSHong Zhang   ics = ic;
445504a3fadSHong Zhang 
446085256b3SBarry Smith   do {
44707b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
448289bc588SBarry Smith     for (i = 0; i < n; i++) {
449b3bf805bSHong Zhang       nz    = bi[i + 1] - bi[i];
450b3bf805bSHong Zhang       bjtmp = bj + bi[i];
4519f7d0b68SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
452289bc588SBarry Smith 
453289bc588SBarry Smith       /* load in initial (unfactored row) */
4549f7d0b68SHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
4559f7d0b68SHong Zhang       ajtmp = aj + ai[r[i]];
4569f7d0b68SHong Zhang       v     = aa + ai[r[i]];
457ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
458d2276718SHong Zhang       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
459289bc588SBarry Smith 
460b3bf805bSHong Zhang       row = *bjtmp++;
461f2582111SSatish Balay       while (row < i) {
4628c37ef55SBarry Smith         pc = rtmp + row;
4638c37ef55SBarry Smith         if (*pc != 0.0) {
464*421480d9SBarry Smith           pv         = ba + ddiag[row];
465*421480d9SBarry Smith           pj         = b->j + ddiag[row] + 1;
46635aab85fSBarry Smith           multiplier = *pc / *pv++;
4678c37ef55SBarry Smith           *pc        = multiplier;
468*421480d9SBarry Smith           nz         = bi[row + 1] - ddiag[row] - 1;
4699f7d0b68SHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
4709566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
471289bc588SBarry Smith         }
472b3bf805bSHong Zhang         row = *bjtmp++;
473289bc588SBarry Smith       }
474416022c9SBarry Smith       /* finished row so stick it into b->a */
475b65878eeSJunchao Zhang       pv   = ba + bi[i];
476b3bf805bSHong Zhang       pj   = b->j + bi[i];
477b3bf805bSHong Zhang       nz   = bi[i + 1] - bi[i];
478*421480d9SBarry Smith       diag = ddiag[i] - bi[i];
479e57ff13aSBarry Smith       rs   = 0.0;
480d3d32019SBarry Smith       for (j = 0; j < nz; j++) {
4819f7d0b68SHong Zhang         pv[j] = rtmp[pj[j]];
4829f7d0b68SHong Zhang         rs += PetscAbsScalar(pv[j]);
483d3d32019SBarry Smith       }
484e57ff13aSBarry Smith       rs -= PetscAbsScalar(pv[diag]);
485d2276718SHong Zhang 
486d2276718SHong Zhang       sctx.rs = rs;
487d2276718SHong Zhang       sctx.pv = pv[diag];
4889566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
48907b50cabSHong Zhang       if (sctx.newshift) break;
490504a3fadSHong Zhang       pv[diag] = sctx.pv;
49135aab85fSBarry Smith     }
492d2276718SHong Zhang 
49307b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
4946cc28720Svictorle       /*
4956c037d1bSvictorle        * if no shift in this attempt & shifting & started shifting & can refine,
4966cc28720Svictorle        * then try lower shift
4976cc28720Svictorle        */
4980481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
4990481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
5000481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
50107b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
502d2276718SHong Zhang       sctx.nshift++;
5036cc28720Svictorle     }
50407b50cabSHong Zhang   } while (sctx.newshift);
5050f11f9aeSSatish Balay 
506a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
507*421480d9SBarry Smith   for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]];
5089566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
5099566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
5109566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
511b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
512b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
513d3ac4fa3SBarry Smith 
5149566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
5159566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
5168d8c7c43SBarry Smith   if (row_identity && col_identity) {
517ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
5188d8c7c43SBarry Smith   } else {
519ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_inplace;
520db4efbfdSBarry Smith   }
521ad04f41aSHong Zhang   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
522ad04f41aSHong Zhang   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
523ad04f41aSHong Zhang   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
524ad04f41aSHong Zhang   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
525a3d9026eSPierre Jolivet   C->ops->matsolvetranspose = NULL;
5262205254eSKarl Rupp 
527c456f294SBarry Smith   C->assembled    = PETSC_TRUE;
5285c9eb25fSBarry Smith   C->preallocated = PETSC_TRUE;
5292205254eSKarl Rupp 
5309566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
531d2276718SHong Zhang   if (sctx.nshift) {
532f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
5339566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
534f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
5359566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
5366cc28720Svictorle     }
5370697b6caSHong Zhang   }
53857508eceSPierre Jolivet   C->ops->solve          = MatSolve_SeqAIJ_inplace;
53957508eceSPierre Jolivet   C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
5402205254eSKarl Rupp 
5419566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode(C));
5423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
543289bc588SBarry Smith }
5440889b5dcSvictorle 
545ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
546ff6a9541SJacob Faibussowitsch 
547137fb511SHong Zhang /*
548137fb511SHong Zhang    This routine implements inplace ILU(0) with row or/and column permutations.
549137fb511SHong Zhang    Input:
550137fb511SHong Zhang      A - original matrix
551137fb511SHong Zhang    Output;
552137fb511SHong Zhang      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
553137fb511SHong Zhang          a->j (col index) is permuted by the inverse of colperm, then sorted
554137fb511SHong Zhang          a->a reordered accordingly with a->j
555137fb511SHong Zhang          a->diag (ptr to diagonal elements) is updated.
556137fb511SHong Zhang */
557d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
558d71ae5a4SJacob Faibussowitsch {
559b051339dSHong Zhang   Mat_SeqAIJ     *a     = (Mat_SeqAIJ *)A->data;
560b051339dSHong Zhang   IS              isrow = a->row, isicol = a->icol;
5615d0c19d7SBarry Smith   const PetscInt *r, *ic, *ics;
5625d0c19d7SBarry Smith   PetscInt        i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
5635d0c19d7SBarry Smith   PetscInt       *ajtmp, nz, row;
564*421480d9SBarry Smith   PetscInt        nbdiag, *pj;
565a77337e4SBarry Smith   PetscScalar    *rtmp, *pc, multiplier, d;
566504a3fadSHong Zhang   MatScalar      *pv, *v;
567137fb511SHong Zhang   PetscReal       rs;
568d90ac83dSHong Zhang   FactorShiftCtx  sctx;
569b65878eeSJunchao Zhang   MatScalar      *aa, *vtmp;
570*421480d9SBarry Smith   PetscInt       *diag;
571137fb511SHong Zhang 
572137fb511SHong Zhang   PetscFunctionBegin;
57308401ef6SPierre Jolivet   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
574504a3fadSHong Zhang 
575*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL));
576b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArray(A, &aa));
577504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
5789566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
579504a3fadSHong Zhang 
580504a3fadSHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
581*421480d9SBarry Smith     const PetscInt *ddiag;
582*421480d9SBarry Smith 
583*421480d9SBarry Smith     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
584504a3fadSHong Zhang     sctx.shift_top = info->zeropivot;
585504a3fadSHong Zhang     for (i = 0; i < n; i++) {
586504a3fadSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
587*421480d9SBarry Smith       d    = aa[ddiag[i]];
588504a3fadSHong Zhang       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
589504a3fadSHong Zhang       vtmp = aa + ai[i];
590504a3fadSHong Zhang       nz   = ai[i + 1] - ai[i];
5912205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
592504a3fadSHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
593504a3fadSHong Zhang     }
594504a3fadSHong Zhang     sctx.shift_top *= 1.1;
595504a3fadSHong Zhang     sctx.nshift_max = 5;
596504a3fadSHong Zhang     sctx.shift_lo   = 0.;
597504a3fadSHong Zhang     sctx.shift_hi   = 1.;
598504a3fadSHong Zhang   }
599504a3fadSHong Zhang 
6009566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
6019566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
6029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
6039566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, n + 1));
604b051339dSHong Zhang   ics = ic;
605137fb511SHong Zhang 
606504a3fadSHong Zhang #if defined(MV)
60775567043SBarry Smith   sctx.shift_top      = 0.;
608e60cf9a0SBarry Smith   sctx.nshift_max     = 0;
60975567043SBarry Smith   sctx.shift_lo       = 0.;
61075567043SBarry Smith   sctx.shift_hi       = 0.;
61175567043SBarry Smith   sctx.shift_fraction = 0.;
612137fb511SHong Zhang 
613f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
61475567043SBarry Smith     sctx.shift_top = 0.;
615137fb511SHong Zhang     for (i = 0; i < n; i++) {
616137fb511SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
617b65878eeSJunchao Zhang       d  = aa[diag[i]];
618137fb511SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
619b65878eeSJunchao Zhang       v  = aa + ai[i];
620b051339dSHong Zhang       nz = ai[i + 1] - ai[i];
6212205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
622137fb511SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
623137fb511SHong Zhang     }
624137fb511SHong Zhang     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
625137fb511SHong Zhang     sctx.shift_top *= 1.1;
626137fb511SHong Zhang     sctx.nshift_max = 5;
627137fb511SHong Zhang     sctx.shift_lo   = 0.;
628137fb511SHong Zhang     sctx.shift_hi   = 1.;
629137fb511SHong Zhang   }
630137fb511SHong Zhang 
63175567043SBarry Smith   sctx.shift_amount = 0.;
632137fb511SHong Zhang   sctx.nshift       = 0;
633504a3fadSHong Zhang #endif
634504a3fadSHong Zhang 
635137fb511SHong Zhang   do {
63607b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
637137fb511SHong Zhang     for (i = 0; i < n; i++) {
638b051339dSHong Zhang       /* load in initial unfactored row */
639b051339dSHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
640b051339dSHong Zhang       ajtmp = aj + ai[r[i]];
641b65878eeSJunchao Zhang       v     = aa + ai[r[i]];
642137fb511SHong Zhang       /* sort permuted ajtmp and values v accordingly */
643b051339dSHong Zhang       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
6449566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
645137fb511SHong Zhang 
646b051339dSHong Zhang       diag[r[i]] = ai[r[i]];
647137fb511SHong Zhang       for (j = 0; j < nz; j++) {
648137fb511SHong Zhang         rtmp[ajtmp[j]] = v[j];
649b051339dSHong Zhang         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
650137fb511SHong Zhang       }
651137fb511SHong Zhang       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
652137fb511SHong Zhang 
653b051339dSHong Zhang       row = *ajtmp++;
654137fb511SHong Zhang       while (row < i) {
655137fb511SHong Zhang         pc = rtmp + row;
656137fb511SHong Zhang         if (*pc != 0.0) {
657b65878eeSJunchao Zhang           pv = aa + diag[r[row]];
658b051339dSHong Zhang           pj = aj + diag[r[row]] + 1;
659137fb511SHong Zhang 
660137fb511SHong Zhang           multiplier = *pc / *pv++;
661137fb511SHong Zhang           *pc        = multiplier;
662b051339dSHong Zhang           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
663b051339dSHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
6649566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
665137fb511SHong Zhang         }
666b051339dSHong Zhang         row = *ajtmp++;
667137fb511SHong Zhang       }
668b65878eeSJunchao Zhang       /* finished row so overwrite it onto aa */
669b65878eeSJunchao Zhang       pv     = aa + ai[r[i]];
670b051339dSHong Zhang       pj     = aj + ai[r[i]];
671b051339dSHong Zhang       nz     = ai[r[i] + 1] - ai[r[i]];
672b051339dSHong Zhang       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
673137fb511SHong Zhang 
674137fb511SHong Zhang       rs = 0.0;
675137fb511SHong Zhang       for (j = 0; j < nz; j++) {
676b051339dSHong Zhang         pv[j] = rtmp[pj[j]];
677b051339dSHong Zhang         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
678137fb511SHong Zhang       }
679137fb511SHong Zhang 
680137fb511SHong Zhang       sctx.rs = rs;
681b051339dSHong Zhang       sctx.pv = pv[nbdiag];
6829566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
68307b50cabSHong Zhang       if (sctx.newshift) break;
684504a3fadSHong Zhang       pv[nbdiag] = sctx.pv;
685137fb511SHong Zhang     }
686137fb511SHong Zhang 
68707b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
688137fb511SHong Zhang       /*
689137fb511SHong Zhang        * if no shift in this attempt & shifting & started shifting & can refine,
690137fb511SHong Zhang        * then try lower shift
691137fb511SHong Zhang        */
6920481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
6930481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
6940481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
69507b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
696137fb511SHong Zhang       sctx.nshift++;
697137fb511SHong Zhang     }
69807b50cabSHong Zhang   } while (sctx.newshift);
699137fb511SHong Zhang 
700a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
701b65878eeSJunchao Zhang   for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];
702137fb511SHong Zhang 
703b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArray(A, &aa));
7049566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
7059566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
7069566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
7072205254eSKarl Rupp 
708b051339dSHong Zhang   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
709ad04f41aSHong Zhang   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
710ad04f41aSHong Zhang   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
711ad04f41aSHong Zhang   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
7122205254eSKarl Rupp 
713b051339dSHong Zhang   A->assembled    = PETSC_TRUE;
7145c9eb25fSBarry Smith   A->preallocated = PETSC_TRUE;
7152205254eSKarl Rupp 
7169566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(A->cmap->n));
717137fb511SHong Zhang   if (sctx.nshift) {
718f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
7199566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
720f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
7219566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
722137fb511SHong Zhang     }
723137fb511SHong Zhang   }
7243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
725137fb511SHong Zhang }
726137fb511SHong Zhang 
727d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
728d71ae5a4SJacob Faibussowitsch {
729416022c9SBarry Smith   Mat C;
730416022c9SBarry Smith 
7313a40ed3dSBarry Smith   PetscFunctionBegin;
7329566063dSJacob Faibussowitsch   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
7339566063dSJacob Faibussowitsch   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
7349566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(C, A, info));
7352205254eSKarl Rupp 
736db4efbfdSBarry Smith   A->ops->solve          = C->ops->solve;
737db4efbfdSBarry Smith   A->ops->solvetranspose = C->ops->solvetranspose;
7382205254eSKarl Rupp 
7399566063dSJacob Faibussowitsch   PetscCall(MatHeaderMerge(A, &C));
7403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
741da3a660dSBarry Smith }
7421d098868SHong Zhang 
743d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
744d71ae5a4SJacob Faibussowitsch {
745416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
746416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
7475d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
7485d0c19d7SBarry Smith   PetscInt           nz;
7495d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
750dd6ea824SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
751d9fead3dSBarry Smith   const PetscScalar *b;
752b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
753*421480d9SBarry Smith   const PetscInt    *adiag;
7548c37ef55SBarry Smith 
7553a40ed3dSBarry Smith   PetscFunctionBegin;
7563ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
75791bf9adeSBarry Smith 
758*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
759b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
7609566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
7619566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
762416022c9SBarry Smith   tmp = a->solve_work;
7638c37ef55SBarry Smith 
7649371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
7659371c9d4SSatish Balay   r = rout;
7669371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
7679371c9d4SSatish Balay   c = cout + (n - 1);
7688c37ef55SBarry Smith 
7698c37ef55SBarry Smith   /* forward solve the lower triangular */
7708c37ef55SBarry Smith   tmp[0] = b[*r++];
771010a20caSHong Zhang   tmps   = tmp;
7728c37ef55SBarry Smith   for (i = 1; i < n; i++) {
773010a20caSHong Zhang     v   = aa + ai[i];
774010a20caSHong Zhang     vi  = aj + ai[i];
775*421480d9SBarry Smith     nz  = adiag[i] - ai[i];
77653e7425aSBarry Smith     sum = b[*r++];
777003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
7788c37ef55SBarry Smith     tmp[i] = sum;
7798c37ef55SBarry Smith   }
7808c37ef55SBarry Smith 
7818c37ef55SBarry Smith   /* backward solve the upper triangular */
7828c37ef55SBarry Smith   for (i = n - 1; i >= 0; i--) {
783*421480d9SBarry Smith     v   = aa + adiag[i] + 1;
784*421480d9SBarry Smith     vi  = aj + adiag[i] + 1;
785*421480d9SBarry Smith     nz  = ai[i + 1] - adiag[i] - 1;
7868c37ef55SBarry Smith     sum = tmp[i];
787003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
788*421480d9SBarry Smith     x[*c--] = tmp[i] = sum * aa[adiag[i]];
7898c37ef55SBarry Smith   }
790b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
7919566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
7929566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
7939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
7949566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
7959566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
7963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7978c37ef55SBarry Smith }
798026e39d0SSatish Balay 
799ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
800d71ae5a4SJacob Faibussowitsch {
8012bebee5dSHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
8022bebee5dSHong Zhang   IS                 iscol = a->col, isrow = a->row;
8035d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
804910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
8055d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
806910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
807b65878eeSJunchao Zhang   const PetscScalar *b, *aa, *v;
808910cf402Sprj-   PetscBool          isdense;
809*421480d9SBarry Smith   const PetscInt    *adiag;
8102bebee5dSHong Zhang 
8112bebee5dSHong Zhang   PetscFunctionBegin;
8123ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
8139566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
81428b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
815f9fb9879SHong Zhang   if (X != B) {
8169566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
81728b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
818f9fb9879SHong Zhang   }
819*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
820b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
8219566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
8229566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
8239566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
8249566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
8259371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
8269371c9d4SSatish Balay   r = rout;
8279371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
8289371c9d4SSatish Balay   c = cout;
829a36b22ccSSatish Balay   for (neq = 0; neq < B->cmap->n; neq++) {
8302bebee5dSHong Zhang     /* forward solve the lower triangular */
8312bebee5dSHong Zhang     tmp[0] = b[r[0]];
8322bebee5dSHong Zhang     tmps   = tmp;
8332bebee5dSHong Zhang     for (i = 1; i < n; i++) {
8342bebee5dSHong Zhang       v   = aa + ai[i];
8352bebee5dSHong Zhang       vi  = aj + ai[i];
836*421480d9SBarry Smith       nz  = adiag[i] - ai[i];
8372bebee5dSHong Zhang       sum = b[r[i]];
838003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
8392bebee5dSHong Zhang       tmp[i] = sum;
8402bebee5dSHong Zhang     }
8412bebee5dSHong Zhang     /* backward solve the upper triangular */
8422bebee5dSHong Zhang     for (i = n - 1; i >= 0; i--) {
843*421480d9SBarry Smith       v   = aa + adiag[i] + 1;
844*421480d9SBarry Smith       vi  = aj + adiag[i] + 1;
845*421480d9SBarry Smith       nz  = ai[i + 1] - adiag[i] - 1;
8462bebee5dSHong Zhang       sum = tmp[i];
847003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
848*421480d9SBarry Smith       x[c[i]] = tmp[i] = sum * aa[adiag[i]];
8492bebee5dSHong Zhang     }
850910cf402Sprj-     b += ldb;
851910cf402Sprj-     x += ldx;
8522bebee5dSHong Zhang   }
853b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
8549566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
8559566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
8569566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
8579566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
8589566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
8593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8602bebee5dSHong Zhang }
8612bebee5dSHong Zhang 
862d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
863d71ae5a4SJacob Faibussowitsch {
8649bd0847aSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
8659bd0847aSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
866*421480d9SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
867*421480d9SBarry Smith   const PetscInt    *adiag;
868910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
8699bd0847aSShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
870910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, sum;
871b65878eeSJunchao Zhang   const PetscScalar *b, *aa, *v;
872910cf402Sprj-   PetscBool          isdense;
8739bd0847aSShri Abhyankar 
8749bd0847aSShri Abhyankar   PetscFunctionBegin;
8753ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
8769566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
87728b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
878f9fb9879SHong Zhang   if (X != B) {
8799566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
88028b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
881f9fb9879SHong Zhang   }
882*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
883b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
8849566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
8859566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
8869566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
8879566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
8889371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
8899371c9d4SSatish Balay   r = rout;
8909371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
8919371c9d4SSatish Balay   c = cout;
8929bd0847aSShri Abhyankar   for (neq = 0; neq < B->cmap->n; neq++) {
8939bd0847aSShri Abhyankar     /* forward solve the lower triangular */
8949bd0847aSShri Abhyankar     tmp[0] = b[r[0]];
8959bd0847aSShri Abhyankar     v      = aa;
8969bd0847aSShri Abhyankar     vi     = aj;
8979bd0847aSShri Abhyankar     for (i = 1; i < n; i++) {
8989bd0847aSShri Abhyankar       nz  = ai[i + 1] - ai[i];
8999bd0847aSShri Abhyankar       sum = b[r[i]];
9009bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
9019bd0847aSShri Abhyankar       tmp[i] = sum;
9029371c9d4SSatish Balay       v += nz;
9039371c9d4SSatish Balay       vi += nz;
9049bd0847aSShri Abhyankar     }
9059bd0847aSShri Abhyankar     /* backward solve the upper triangular */
9069bd0847aSShri Abhyankar     for (i = n - 1; i >= 0; i--) {
9079bd0847aSShri Abhyankar       v   = aa + adiag[i + 1] + 1;
9089bd0847aSShri Abhyankar       vi  = aj + adiag[i + 1] + 1;
9099bd0847aSShri Abhyankar       nz  = adiag[i] - adiag[i + 1] - 1;
9109bd0847aSShri Abhyankar       sum = tmp[i];
9119bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
9129bd0847aSShri Abhyankar       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
9139bd0847aSShri Abhyankar     }
914910cf402Sprj-     b += ldb;
915910cf402Sprj-     x += ldx;
9169bd0847aSShri Abhyankar   }
917b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
9189566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
9199566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
9209566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
9219566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
9229566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
9233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9249bd0847aSShri Abhyankar }
9259bd0847aSShri Abhyankar 
926a3d9026eSPierre Jolivet PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
927a3d9026eSPierre Jolivet {
928a3d9026eSPierre Jolivet   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
929a3d9026eSPierre Jolivet   IS                 iscol = a->col, isrow = a->row;
930*421480d9SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j;
931*421480d9SBarry Smith   const PetscInt    *adiag = a->diag;
932a3d9026eSPierre Jolivet   PetscInt           nz, neq, ldb, ldx;
933a3d9026eSPierre Jolivet   const PetscInt    *rout, *cout, *r, *c;
934a3d9026eSPierre Jolivet   PetscScalar       *x, *tmp = a->solve_work, s1;
935b65878eeSJunchao Zhang   const PetscScalar *b, *aa, *v;
936a3d9026eSPierre Jolivet   PetscBool          isdense;
937a3d9026eSPierre Jolivet 
938a3d9026eSPierre Jolivet   PetscFunctionBegin;
939a3d9026eSPierre Jolivet   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
940a3d9026eSPierre Jolivet   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
941a3d9026eSPierre Jolivet   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
942a3d9026eSPierre Jolivet   if (X != B) {
943a3d9026eSPierre Jolivet     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
944a3d9026eSPierre Jolivet     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
945a3d9026eSPierre Jolivet   }
946*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
947b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
948a3d9026eSPierre Jolivet   PetscCall(MatDenseGetArrayRead(B, &b));
949a3d9026eSPierre Jolivet   PetscCall(MatDenseGetLDA(B, &ldb));
950a3d9026eSPierre Jolivet   PetscCall(MatDenseGetArray(X, &x));
951a3d9026eSPierre Jolivet   PetscCall(MatDenseGetLDA(X, &ldx));
952a3d9026eSPierre Jolivet   PetscCall(ISGetIndices(isrow, &rout));
953a3d9026eSPierre Jolivet   r = rout;
954a3d9026eSPierre Jolivet   PetscCall(ISGetIndices(iscol, &cout));
955a3d9026eSPierre Jolivet   c = cout;
956a3d9026eSPierre Jolivet   for (neq = 0; neq < B->cmap->n; neq++) {
957a3d9026eSPierre Jolivet     /* copy the b into temp work space according to permutation */
958a3d9026eSPierre Jolivet     for (i = 0; i < n; i++) tmp[i] = b[c[i]];
959a3d9026eSPierre Jolivet 
960a3d9026eSPierre Jolivet     /* forward solve the U^T */
961a3d9026eSPierre Jolivet     for (i = 0; i < n; i++) {
962a3d9026eSPierre Jolivet       v  = aa + adiag[i + 1] + 1;
963a3d9026eSPierre Jolivet       vi = aj + adiag[i + 1] + 1;
964a3d9026eSPierre Jolivet       nz = adiag[i] - adiag[i + 1] - 1;
965a3d9026eSPierre Jolivet       s1 = tmp[i];
966a3d9026eSPierre Jolivet       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
967a3d9026eSPierre Jolivet       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
968a3d9026eSPierre Jolivet       tmp[i] = s1;
969a3d9026eSPierre Jolivet     }
970a3d9026eSPierre Jolivet 
971a3d9026eSPierre Jolivet     /* backward solve the L^T */
972a3d9026eSPierre Jolivet     for (i = n - 1; i >= 0; i--) {
973a3d9026eSPierre Jolivet       v  = aa + ai[i];
974a3d9026eSPierre Jolivet       vi = aj + ai[i];
975a3d9026eSPierre Jolivet       nz = ai[i + 1] - ai[i];
976a3d9026eSPierre Jolivet       s1 = tmp[i];
977a3d9026eSPierre Jolivet       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
978a3d9026eSPierre Jolivet     }
979a3d9026eSPierre Jolivet 
980a3d9026eSPierre Jolivet     /* copy tmp into x according to permutation */
981a3d9026eSPierre Jolivet     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
982a3d9026eSPierre Jolivet     b += ldb;
983a3d9026eSPierre Jolivet     x += ldx;
984a3d9026eSPierre Jolivet   }
985b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
986a3d9026eSPierre Jolivet   PetscCall(ISRestoreIndices(isrow, &rout));
987a3d9026eSPierre Jolivet   PetscCall(ISRestoreIndices(iscol, &cout));
988a3d9026eSPierre Jolivet   PetscCall(MatDenseRestoreArrayRead(B, &b));
989a3d9026eSPierre Jolivet   PetscCall(MatDenseRestoreArray(X, &x));
990a3d9026eSPierre Jolivet   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
991a3d9026eSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
992a3d9026eSPierre Jolivet }
993a3d9026eSPierre Jolivet 
994ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
995d71ae5a4SJacob Faibussowitsch {
996137fb511SHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
997137fb511SHong Zhang   IS                 iscol = a->col, isrow = a->row;
998*421480d9SBarry Smith   const PetscInt    *r, *c, *rout, *cout, *adiag;
9995d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
10005d0c19d7SBarry Smith   PetscInt           nz, row;
1001fdc842d1SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
1002fdc842d1SBarry Smith   const PetscScalar *b;
1003b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1004137fb511SHong Zhang 
1005137fb511SHong Zhang   PetscFunctionBegin;
10063ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1007137fb511SHong Zhang 
1008*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1009b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
10109566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
10119566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1012137fb511SHong Zhang   tmp = a->solve_work;
1013137fb511SHong Zhang 
10149371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
10159371c9d4SSatish Balay   r = rout;
10169371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
10179371c9d4SSatish Balay   c = cout + (n - 1);
1018137fb511SHong Zhang 
1019137fb511SHong Zhang   /* forward solve the lower triangular */
1020137fb511SHong Zhang   tmp[0] = b[*r++];
1021137fb511SHong Zhang   tmps   = tmp;
1022137fb511SHong Zhang   for (row = 1; row < n; row++) {
1023137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1024137fb511SHong Zhang     v   = aa + ai[i];
1025137fb511SHong Zhang     vi  = aj + ai[i];
1026*421480d9SBarry Smith     nz  = adiag[i] - ai[i];
1027137fb511SHong Zhang     sum = b[*r++];
1028003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1029137fb511SHong Zhang     tmp[row] = sum;
1030137fb511SHong Zhang   }
1031137fb511SHong Zhang 
1032137fb511SHong Zhang   /* backward solve the upper triangular */
1033137fb511SHong Zhang   for (row = n - 1; row >= 0; row--) {
1034137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1035*421480d9SBarry Smith     v   = aa + adiag[i] + 1;
1036*421480d9SBarry Smith     vi  = aj + adiag[i] + 1;
1037*421480d9SBarry Smith     nz  = ai[i + 1] - adiag[i] - 1;
1038137fb511SHong Zhang     sum = tmp[row];
1039003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1040*421480d9SBarry Smith     x[*c--] = tmp[row] = sum * aa[adiag[i]];
1041137fb511SHong Zhang   }
1042b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
10439566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
10449566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
10459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
10469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
10479566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
10483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1049137fb511SHong Zhang }
1050137fb511SHong Zhang 
1051c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1052ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1053d71ae5a4SJacob Faibussowitsch {
1054930ae53cSBarry Smith   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1055d0f46423SBarry Smith   PetscInt           n  = A->rmap->n;
1056*421480d9SBarry Smith   const PetscInt    *ai = a->i, *aj = a->j, *adiag;
1057356650c2SBarry Smith   PetscScalar       *x;
1058356650c2SBarry Smith   const PetscScalar *b;
1059b65878eeSJunchao Zhang   const MatScalar   *aa;
1060aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1061356650c2SBarry Smith   PetscInt         adiag_i, i, nz, ai_i;
1062b377110cSBarry Smith   const PetscInt  *vi;
1063dd6ea824SBarry Smith   const MatScalar *v;
1064dd6ea824SBarry Smith   PetscScalar      sum;
1065d85afc42SSatish Balay #endif
1066930ae53cSBarry Smith 
10673a40ed3dSBarry Smith   PetscFunctionBegin;
10683ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1069*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1070b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
10719566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
10729566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1073930ae53cSBarry Smith 
1074aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
10751c4feecaSSatish Balay   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
10761c4feecaSSatish Balay #else
1077930ae53cSBarry Smith   /* forward solve the lower triangular */
1078930ae53cSBarry Smith   x[0] = b[0];
1079930ae53cSBarry Smith   for (i = 1; i < n; i++) {
1080930ae53cSBarry Smith     ai_i = ai[i];
1081930ae53cSBarry Smith     v    = aa + ai_i;
1082930ae53cSBarry Smith     vi   = aj + ai_i;
1083930ae53cSBarry Smith     nz   = adiag[i] - ai_i;
1084930ae53cSBarry Smith     sum  = b[i];
1085003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1086930ae53cSBarry Smith     x[i] = sum;
1087930ae53cSBarry Smith   }
1088930ae53cSBarry Smith 
1089930ae53cSBarry Smith   /* backward solve the upper triangular */
1090930ae53cSBarry Smith   for (i = n - 1; i >= 0; i--) {
1091930ae53cSBarry Smith     adiag_i = adiag[i];
1092d708749eSBarry Smith     v       = aa + adiag_i + 1;
1093d708749eSBarry Smith     vi      = aj + adiag_i + 1;
1094930ae53cSBarry Smith     nz      = ai[i + 1] - adiag_i - 1;
1095930ae53cSBarry Smith     sum     = x[i];
1096003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1097930ae53cSBarry Smith     x[i] = sum * aa[adiag_i];
1098930ae53cSBarry Smith   }
10991c4feecaSSatish Balay #endif
11009566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1101b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
11029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
11039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
11043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1105930ae53cSBarry Smith }
1106930ae53cSBarry Smith 
1107ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1108d71ae5a4SJacob Faibussowitsch {
1109416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1110416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
111104fbf559SBarry Smith   PetscInt           i, n                  = A->rmap->n, j;
11125d0c19d7SBarry Smith   PetscInt           nz;
1113*421480d9SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
111404fbf559SBarry Smith   PetscScalar       *x, *tmp, sum;
111504fbf559SBarry Smith   const PetscScalar *b;
1116b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1117da3a660dSBarry Smith 
11183a40ed3dSBarry Smith   PetscFunctionBegin;
11199566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
1120da3a660dSBarry Smith 
1121*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1122b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
11239566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
11249566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1125416022c9SBarry Smith   tmp = a->solve_work;
1126da3a660dSBarry Smith 
11279371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
11289371c9d4SSatish Balay   r = rout;
11299371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
11309371c9d4SSatish Balay   c = cout + (n - 1);
1131da3a660dSBarry Smith 
1132da3a660dSBarry Smith   /* forward solve the lower triangular */
1133da3a660dSBarry Smith   tmp[0] = b[*r++];
1134da3a660dSBarry Smith   for (i = 1; i < n; i++) {
1135010a20caSHong Zhang     v   = aa + ai[i];
1136010a20caSHong Zhang     vi  = aj + ai[i];
1137*421480d9SBarry Smith     nz  = adiag[i] - ai[i];
1138da3a660dSBarry Smith     sum = b[*r++];
113904fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1140da3a660dSBarry Smith     tmp[i] = sum;
1141da3a660dSBarry Smith   }
1142da3a660dSBarry Smith 
1143da3a660dSBarry Smith   /* backward solve the upper triangular */
1144da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1145*421480d9SBarry Smith     v   = aa + adiag[i] + 1;
1146*421480d9SBarry Smith     vi  = aj + adiag[i] + 1;
1147*421480d9SBarry Smith     nz  = ai[i + 1] - adiag[i] - 1;
1148da3a660dSBarry Smith     sum = tmp[i];
114904fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1150*421480d9SBarry Smith     tmp[i] = sum * aa[adiag[i]];
1151da3a660dSBarry Smith     x[*c--] += tmp[i];
1152da3a660dSBarry Smith   }
1153da3a660dSBarry Smith 
1154b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
11559566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
11569566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
11579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
11589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
11599566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
11603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1161da3a660dSBarry Smith }
116204fbf559SBarry Smith 
1163d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1164d71ae5a4SJacob Faibussowitsch {
11653c0119dfSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
11663c0119dfSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
11673c0119dfSShri Abhyankar   PetscInt           i, n                  = A->rmap->n, j;
11683c0119dfSShri Abhyankar   PetscInt           nz;
1169*421480d9SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
11703c0119dfSShri Abhyankar   PetscScalar       *x, *tmp, sum;
11713c0119dfSShri Abhyankar   const PetscScalar *b;
1172b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
11733c0119dfSShri Abhyankar 
11743c0119dfSShri Abhyankar   PetscFunctionBegin;
11759566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
11763c0119dfSShri Abhyankar 
1177*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1178b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
11799566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
11809566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
11813c0119dfSShri Abhyankar   tmp = a->solve_work;
11823c0119dfSShri Abhyankar 
11839371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
11849371c9d4SSatish Balay   r = rout;
11859371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
11869371c9d4SSatish Balay   c = cout;
11873c0119dfSShri Abhyankar 
11883c0119dfSShri Abhyankar   /* forward solve the lower triangular */
11893c0119dfSShri Abhyankar   tmp[0] = b[r[0]];
11903c0119dfSShri Abhyankar   v      = aa;
11913c0119dfSShri Abhyankar   vi     = aj;
11923c0119dfSShri Abhyankar   for (i = 1; i < n; i++) {
11933c0119dfSShri Abhyankar     nz  = ai[i + 1] - ai[i];
11943c0119dfSShri Abhyankar     sum = b[r[i]];
11953c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
11963c0119dfSShri Abhyankar     tmp[i] = sum;
11972205254eSKarl Rupp     v += nz;
11982205254eSKarl Rupp     vi += nz;
11993c0119dfSShri Abhyankar   }
12003c0119dfSShri Abhyankar 
12013c0119dfSShri Abhyankar   /* backward solve the upper triangular */
12023c0119dfSShri Abhyankar   v  = aa + adiag[n - 1];
12033c0119dfSShri Abhyankar   vi = aj + adiag[n - 1];
12043c0119dfSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
12053c0119dfSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
12063c0119dfSShri Abhyankar     sum = tmp[i];
12073c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
12083c0119dfSShri Abhyankar     tmp[i] = sum * v[nz];
12093c0119dfSShri Abhyankar     x[c[i]] += tmp[i];
12109371c9d4SSatish Balay     v += nz + 1;
12119371c9d4SSatish Balay     vi += nz + 1;
12123c0119dfSShri Abhyankar   }
12133c0119dfSShri Abhyankar 
1214b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
12159566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
12169566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
12179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12199566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
12203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
12213c0119dfSShri Abhyankar }
12223c0119dfSShri Abhyankar 
1223d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1224d71ae5a4SJacob Faibussowitsch {
1225416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
12262235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
122704fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
122804fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
122904fbf559SBarry Smith   PetscInt           nz;
123004fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1231b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
123204fbf559SBarry Smith   const PetscScalar *b;
1233da3a660dSBarry Smith 
12343a40ed3dSBarry Smith   PetscFunctionBegin;
1235b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
12369566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12379566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1238416022c9SBarry Smith   tmp = a->solve_work;
1239da3a660dSBarry Smith 
12409371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
12419371c9d4SSatish Balay   r = rout;
12429371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
12439371c9d4SSatish Balay   c = cout;
1244da3a660dSBarry Smith 
1245da3a660dSBarry Smith   /* copy the b into temp work space according to permutation */
12462235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1247da3a660dSBarry Smith 
1248da3a660dSBarry Smith   /* forward solve the U^T */
1249da3a660dSBarry Smith   for (i = 0; i < n; i++) {
1250010a20caSHong Zhang     v  = aa + diag[i];
1251010a20caSHong Zhang     vi = aj + diag[i] + 1;
1252f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
1253c38d4ed2SBarry Smith     s1 = tmp[i];
1254ef66eb69SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
125504fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1256c38d4ed2SBarry Smith     tmp[i] = s1;
1257da3a660dSBarry Smith   }
1258da3a660dSBarry Smith 
1259da3a660dSBarry Smith   /* backward solve the L^T */
1260da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1261010a20caSHong Zhang     v  = aa + diag[i] - 1;
1262010a20caSHong Zhang     vi = aj + diag[i] - 1;
1263f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
1264f1af5d2fSBarry Smith     s1 = tmp[i];
126504fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1266da3a660dSBarry Smith   }
1267da3a660dSBarry Smith 
1268da3a660dSBarry Smith   /* copy tmp into x according to permutation */
1269591294e4SBarry Smith   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1270da3a660dSBarry Smith 
12719566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
12729566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1273b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
12749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1276da3a660dSBarry Smith 
12779566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
12783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1279da3a660dSBarry Smith }
1280da3a660dSBarry Smith 
1281d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1282d71ae5a4SJacob Faibussowitsch {
1283d1fa9404SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1284d1fa9404SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1285*421480d9SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1286d1fa9404SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1287d1fa9404SShri Abhyankar   PetscInt           nz;
1288d1fa9404SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1289b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1290d1fa9404SShri Abhyankar   const PetscScalar *b;
1291d1fa9404SShri Abhyankar 
1292d1fa9404SShri Abhyankar   PetscFunctionBegin;
1293*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1294b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
12959566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12969566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1297d1fa9404SShri Abhyankar   tmp = a->solve_work;
1298d1fa9404SShri Abhyankar 
12999371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
13009371c9d4SSatish Balay   r = rout;
13019371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
13029371c9d4SSatish Balay   c = cout;
1303d1fa9404SShri Abhyankar 
1304d1fa9404SShri Abhyankar   /* copy the b into temp work space according to permutation */
1305d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1306d1fa9404SShri Abhyankar 
1307d1fa9404SShri Abhyankar   /* forward solve the U^T */
1308d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) {
1309d1fa9404SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1310d1fa9404SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1311d1fa9404SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1312d1fa9404SShri Abhyankar     s1 = tmp[i];
1313d1fa9404SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1314d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1315d1fa9404SShri Abhyankar     tmp[i] = s1;
1316d1fa9404SShri Abhyankar   }
1317d1fa9404SShri Abhyankar 
1318d1fa9404SShri Abhyankar   /* backward solve the L^T */
1319d1fa9404SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1320d1fa9404SShri Abhyankar     v  = aa + ai[i];
1321d1fa9404SShri Abhyankar     vi = aj + ai[i];
1322d1fa9404SShri Abhyankar     nz = ai[i + 1] - ai[i];
1323d1fa9404SShri Abhyankar     s1 = tmp[i];
1324d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1325d1fa9404SShri Abhyankar   }
1326d1fa9404SShri Abhyankar 
1327d1fa9404SShri Abhyankar   /* copy tmp into x according to permutation */
1328d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1329d1fa9404SShri Abhyankar 
13309566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
13319566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1332b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
13339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1335d1fa9404SShri Abhyankar 
13369566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
13373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1338d1fa9404SShri Abhyankar }
1339d1fa9404SShri Abhyankar 
1340d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1341d71ae5a4SJacob Faibussowitsch {
1342416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
13432235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
134404fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
134504fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
134604fbf559SBarry Smith   PetscInt           nz;
134704fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1348b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
134904fbf559SBarry Smith   const PetscScalar *b;
13506abc6512SBarry Smith 
13513a40ed3dSBarry Smith   PetscFunctionBegin;
13529566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
1353b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
13549566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
13559566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1356416022c9SBarry Smith   tmp = a->solve_work;
13576abc6512SBarry Smith 
13589371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
13599371c9d4SSatish Balay   r = rout;
13609371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
13619371c9d4SSatish Balay   c = cout;
13626abc6512SBarry Smith 
13636abc6512SBarry Smith   /* copy the b into temp work space according to permutation */
13642235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
13656abc6512SBarry Smith 
13666abc6512SBarry Smith   /* forward solve the U^T */
13676abc6512SBarry Smith   for (i = 0; i < n; i++) {
1368010a20caSHong Zhang     v  = aa + diag[i];
1369010a20caSHong Zhang     vi = aj + diag[i] + 1;
1370f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
137104fbf559SBarry Smith     s1 = tmp[i];
137204fbf559SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
137304fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
137404fbf559SBarry Smith     tmp[i] = s1;
13756abc6512SBarry Smith   }
13766abc6512SBarry Smith 
13776abc6512SBarry Smith   /* backward solve the L^T */
13786abc6512SBarry Smith   for (i = n - 1; i >= 0; i--) {
1379010a20caSHong Zhang     v  = aa + diag[i] - 1;
1380010a20caSHong Zhang     vi = aj + diag[i] - 1;
1381f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
138204fbf559SBarry Smith     s1 = tmp[i];
138304fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
13846abc6512SBarry Smith   }
13856abc6512SBarry Smith 
13866abc6512SBarry Smith   /* copy tmp into x according to permutation */
13876abc6512SBarry Smith   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
13886abc6512SBarry Smith 
13899566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
13909566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1391b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
13929566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
13946abc6512SBarry Smith 
13959566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
13963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1397da3a660dSBarry Smith }
139804fbf559SBarry Smith 
1399d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1400d71ae5a4SJacob Faibussowitsch {
1401533e6011SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1402533e6011SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1403*421480d9SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1404533e6011SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1405533e6011SShri Abhyankar   PetscInt           nz;
1406533e6011SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1407b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1408533e6011SShri Abhyankar   const PetscScalar *b;
1409533e6011SShri Abhyankar 
1410533e6011SShri Abhyankar   PetscFunctionBegin;
14119566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
1412*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1413b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
14149566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
14159566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1416533e6011SShri Abhyankar   tmp = a->solve_work;
1417533e6011SShri Abhyankar 
14189371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
14199371c9d4SSatish Balay   r = rout;
14209371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
14219371c9d4SSatish Balay   c = cout;
1422533e6011SShri Abhyankar 
1423533e6011SShri Abhyankar   /* copy the b into temp work space according to permutation */
1424533e6011SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1425533e6011SShri Abhyankar 
1426533e6011SShri Abhyankar   /* forward solve the U^T */
1427533e6011SShri Abhyankar   for (i = 0; i < n; i++) {
1428533e6011SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1429533e6011SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1430533e6011SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1431533e6011SShri Abhyankar     s1 = tmp[i];
1432533e6011SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1433533e6011SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1434533e6011SShri Abhyankar     tmp[i] = s1;
1435533e6011SShri Abhyankar   }
1436533e6011SShri Abhyankar 
1437533e6011SShri Abhyankar   /* backward solve the L^T */
1438533e6011SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1439533e6011SShri Abhyankar     v  = aa + ai[i];
1440533e6011SShri Abhyankar     vi = aj + ai[i];
1441533e6011SShri Abhyankar     nz = ai[i + 1] - ai[i];
1442533e6011SShri Abhyankar     s1 = tmp[i];
1443c5e3b2a3SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1444533e6011SShri Abhyankar   }
1445533e6011SShri Abhyankar 
1446533e6011SShri Abhyankar   /* copy tmp into x according to permutation */
1447533e6011SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1448533e6011SShri Abhyankar 
14499566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
14509566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1451b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
14529566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
14539566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
1454533e6011SShri Abhyankar 
14559566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
14563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1457533e6011SShri Abhyankar }
1458533e6011SShri Abhyankar 
14598fc3a347SHong Zhang /*
14608a76ed4fSHong Zhang    ilu() under revised new data structure.
1461813a1f2bSShri Abhyankar    Factored arrays bj and ba are stored as
1462813a1f2bSShri Abhyankar      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1463813a1f2bSShri Abhyankar 
1464813a1f2bSShri Abhyankar    bi=fact->i is an array of size n+1, in which
1465baabb860SHong Zhang      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1466baabb860SHong Zhang      bi[n]:  points to L(n-1,n-1)+1
1467baabb860SHong Zhang 
1468813a1f2bSShri Abhyankar   bdiag=fact->diag is an array of size n+1,in which
1469baabb860SHong Zhang      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1470a24f213cSHong Zhang      bdiag[n]: points to entry of U(n-1,0)-1
1471813a1f2bSShri Abhyankar 
1472813a1f2bSShri Abhyankar    U(i,:) contains bdiag[i] as its last entry, i.e.,
1473813a1f2bSShri Abhyankar     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1474813a1f2bSShri Abhyankar */
1475d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1476d71ae5a4SJacob Faibussowitsch {
1477813a1f2bSShri Abhyankar   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1478*421480d9SBarry Smith   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag;
1479bbd65245SShri Abhyankar   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
14801df811f5SHong Zhang   IS             isicol;
1481813a1f2bSShri Abhyankar 
1482813a1f2bSShri Abhyankar   PetscFunctionBegin;
14839566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1484*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
14859566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
148657508eceSPierre Jolivet   b = (Mat_SeqAIJ *)fact->data;
1487813a1f2bSShri Abhyankar 
1488813a1f2bSShri Abhyankar   /* allocate matrix arrays for new data structure */
148984648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a));
149084648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j));
14919f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
14929f0612e4SBarry Smith   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
14939f0612e4SBarry Smith   b->free_a  = PETSC_TRUE;
14949f0612e4SBarry Smith   b->free_ij = PETSC_TRUE;
14952205254eSKarl Rupp 
1496aa624791SPierre Jolivet   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1497813a1f2bSShri Abhyankar   bdiag = b->diag;
1498813a1f2bSShri Abhyankar 
1499813a1f2bSShri Abhyankar   /* set bi and bj with new data structure */
1500813a1f2bSShri Abhyankar   bi = b->i;
1501813a1f2bSShri Abhyankar   bj = b->j;
1502813a1f2bSShri Abhyankar 
1503813a1f2bSShri Abhyankar   /* L part */
1504e60cf9a0SBarry Smith   bi[0] = 0;
1505813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1506813a1f2bSShri Abhyankar     nz        = adiag[i] - ai[i];
1507813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nz;
1508813a1f2bSShri Abhyankar     aj        = a->j + ai[i];
1509ad540459SPierre Jolivet     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1510813a1f2bSShri Abhyankar   }
1511813a1f2bSShri Abhyankar 
1512813a1f2bSShri Abhyankar   /* U part */
151362fbd6afSShri Abhyankar   bdiag[n] = bi[n] - 1;
1514813a1f2bSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1515813a1f2bSShri Abhyankar     nz = ai[i + 1] - adiag[i] - 1;
1516813a1f2bSShri Abhyankar     aj = a->j + adiag[i] + 1;
1517ad540459SPierre Jolivet     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1518813a1f2bSShri Abhyankar     /* diag[i] */
1519bbd65245SShri Abhyankar     bj[k++]  = i;
1520a24f213cSHong Zhang     bdiag[i] = bdiag[i + 1] + nz + 1;
1521813a1f2bSShri Abhyankar   }
15221df811f5SHong Zhang 
1523d5f3da31SBarry Smith   fact->factortype             = MAT_FACTOR_ILU;
15241df811f5SHong Zhang   fact->info.factor_mallocs    = 0;
15251df811f5SHong Zhang   fact->info.fill_ratio_given  = info->fill;
15261df811f5SHong Zhang   fact->info.fill_ratio_needed = 1.0;
152735233d99SShri Abhyankar   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
15289566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
15291df811f5SHong Zhang 
153057508eceSPierre Jolivet   b       = (Mat_SeqAIJ *)fact->data;
15311df811f5SHong Zhang   b->row  = isrow;
15321df811f5SHong Zhang   b->col  = iscol;
15331df811f5SHong Zhang   b->icol = isicol;
153484648c2dSPierre Jolivet   PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work));
15359566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
15369566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
15373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1538813a1f2bSShri Abhyankar }
1539813a1f2bSShri Abhyankar 
1540d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1541d71ae5a4SJacob Faibussowitsch {
1542813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1543813a1f2bSShri Abhyankar   IS                 isicol;
1544813a1f2bSShri Abhyankar   const PetscInt    *r, *ic;
15451df811f5SHong Zhang   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1546813a1f2bSShri Abhyankar   PetscInt          *bi, *cols, nnz, *cols_lvl;
1547813a1f2bSShri Abhyankar   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1548813a1f2bSShri Abhyankar   PetscInt           i, levels, diagonal_fill;
1549*421480d9SBarry Smith   PetscBool          col_identity, row_identity;
1550813a1f2bSShri Abhyankar   PetscReal          f;
15510298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1552813a1f2bSShri Abhyankar   PetscBT            lnkbt;
1553813a1f2bSShri Abhyankar   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
15540298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
15550298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1556*421480d9SBarry Smith   PetscBool          diagDense;
1557813a1f2bSShri Abhyankar 
1558813a1f2bSShri Abhyankar   PetscFunctionBegin;
155908401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1560*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
1561*421480d9SBarry Smith   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
1562ad04f41aSHong Zhang 
1563813a1f2bSShri Abhyankar   levels = (PetscInt)info->levels;
15649566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
15659566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
1566813a1f2bSShri Abhyankar   if (!levels && row_identity && col_identity) {
1567813a1f2bSShri Abhyankar     /* special case: ilu(0) with natural ordering */
15689566063dSJacob Faibussowitsch     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
15694d12350bSJunchao Zhang     if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
15703ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1571813a1f2bSShri Abhyankar   }
1572813a1f2bSShri Abhyankar 
15739566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
15749566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
15759566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
1576813a1f2bSShri Abhyankar 
15771bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
15789f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
15799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1580e60cf9a0SBarry Smith   bi[0] = bdiag[0] = 0;
15819566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1582813a1f2bSShri Abhyankar 
1583813a1f2bSShri Abhyankar   /* create a linked list for storing column indices of the active row */
1584813a1f2bSShri Abhyankar   nlnk = n + 1;
15859566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1586813a1f2bSShri Abhyankar 
1587813a1f2bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
15881df811f5SHong Zhang   f             = info->fill;
15891df811f5SHong Zhang   diagonal_fill = (PetscInt)info->diagonal_fill;
15909566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1591813a1f2bSShri Abhyankar   current_space = free_space;
15929566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1593813a1f2bSShri Abhyankar   current_space_lvl = free_space_lvl;
1594813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1595813a1f2bSShri Abhyankar     nzi = 0;
1596813a1f2bSShri Abhyankar     /* copy current row into linked list */
1597813a1f2bSShri Abhyankar     nnz = ai[r[i] + 1] - ai[r[i]];
159828b400f6SJacob Faibussowitsch     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1599813a1f2bSShri Abhyankar     cols   = aj + ai[r[i]];
1600813a1f2bSShri Abhyankar     lnk[i] = -1; /* marker to indicate if diagonal exists */
16019566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1602813a1f2bSShri Abhyankar     nzi += nlnk;
1603813a1f2bSShri Abhyankar 
1604813a1f2bSShri Abhyankar     /* make sure diagonal entry is included */
1605813a1f2bSShri Abhyankar     if (diagonal_fill && lnk[i] == -1) {
1606813a1f2bSShri Abhyankar       fm = n;
1607813a1f2bSShri Abhyankar       while (lnk[fm] < i) fm = lnk[fm];
1608813a1f2bSShri Abhyankar       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1609813a1f2bSShri Abhyankar       lnk[fm]    = i;
1610813a1f2bSShri Abhyankar       lnk_lvl[i] = 0;
16119371c9d4SSatish Balay       nzi++;
16129371c9d4SSatish Balay       dcount++;
1613813a1f2bSShri Abhyankar     }
1614813a1f2bSShri Abhyankar 
1615813a1f2bSShri Abhyankar     /* add pivot rows into the active row */
1616813a1f2bSShri Abhyankar     nzbd = 0;
1617813a1f2bSShri Abhyankar     prow = lnk[n];
1618813a1f2bSShri Abhyankar     while (prow < i) {
1619813a1f2bSShri Abhyankar       nnz      = bdiag[prow];
1620813a1f2bSShri Abhyankar       cols     = bj_ptr[prow] + nnz + 1;
1621813a1f2bSShri Abhyankar       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1622813a1f2bSShri Abhyankar       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
16239566063dSJacob Faibussowitsch       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1624813a1f2bSShri Abhyankar       nzi += nlnk;
1625813a1f2bSShri Abhyankar       prow = lnk[prow];
1626813a1f2bSShri Abhyankar       nzbd++;
1627813a1f2bSShri Abhyankar     }
1628813a1f2bSShri Abhyankar     bdiag[i]  = nzbd;
1629813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
1630813a1f2bSShri Abhyankar     /* if free space is not available, make more free space */
1631813a1f2bSShri Abhyankar     if (current_space->local_remaining < nzi) {
1632f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
16339566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
16349566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1635813a1f2bSShri Abhyankar       reallocs++;
1636813a1f2bSShri Abhyankar     }
1637813a1f2bSShri Abhyankar 
1638813a1f2bSShri Abhyankar     /* copy data into free_space and free_space_lvl, then initialize lnk */
16399566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1640813a1f2bSShri Abhyankar     bj_ptr[i]    = current_space->array;
1641813a1f2bSShri Abhyankar     bjlvl_ptr[i] = current_space_lvl->array;
1642813a1f2bSShri Abhyankar 
1643813a1f2bSShri Abhyankar     /* make sure the active row i has diagonal entry */
164400045ab3SPierre Jolivet     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1645813a1f2bSShri Abhyankar 
1646813a1f2bSShri Abhyankar     current_space->array += nzi;
1647813a1f2bSShri Abhyankar     current_space->local_used += nzi;
1648813a1f2bSShri Abhyankar     current_space->local_remaining -= nzi;
1649813a1f2bSShri Abhyankar     current_space_lvl->array += nzi;
1650813a1f2bSShri Abhyankar     current_space_lvl->local_used += nzi;
1651813a1f2bSShri Abhyankar     current_space_lvl->local_remaining -= nzi;
1652813a1f2bSShri Abhyankar   }
1653813a1f2bSShri Abhyankar 
16549566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
16559566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
1656813a1f2bSShri Abhyankar   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
165784648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
16589566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1659813a1f2bSShri Abhyankar 
16609566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
16619566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
16629566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1663813a1f2bSShri Abhyankar 
1664813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO)
1665813a1f2bSShri Abhyankar   {
1666aef85c9fSShri Abhyankar     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
16679566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
16689566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
16699566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
16709566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
167148a46eb9SPierre Jolivet     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1672813a1f2bSShri Abhyankar   }
1673813a1f2bSShri Abhyankar #endif
1674813a1f2bSShri Abhyankar   /* put together the new matrix */
16759566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
167657508eceSPierre Jolivet   b          = (Mat_SeqAIJ *)fact->data;
1677813a1f2bSShri Abhyankar   b->free_ij = PETSC_TRUE;
16789f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
16799f0612e4SBarry Smith   b->free_a = PETSC_TRUE;
1680813a1f2bSShri Abhyankar   b->j      = bj;
1681813a1f2bSShri Abhyankar   b->i      = bi;
1682813a1f2bSShri Abhyankar   b->diag   = bdiag;
1683f4259b30SLisandro Dalcin   b->ilen   = NULL;
1684f4259b30SLisandro Dalcin   b->imax   = NULL;
1685813a1f2bSShri Abhyankar   b->row    = isrow;
1686813a1f2bSShri Abhyankar   b->col    = iscol;
16879566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
16889566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
1689813a1f2bSShri Abhyankar   b->icol = isicol;
16902205254eSKarl Rupp 
169184648c2dSPierre Jolivet   PetscCall(PetscMalloc1(n, &b->solve_work));
1692813a1f2bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.
1693813a1f2bSShri Abhyankar      Allocate bdiag, solve_work, new a, new j */
1694f268cba8SShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
16952205254eSKarl Rupp 
169657508eceSPierre Jolivet   fact->info.factor_mallocs    = reallocs;
169757508eceSPierre Jolivet   fact->info.fill_ratio_given  = f;
169857508eceSPierre Jolivet   fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
169957508eceSPierre Jolivet   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
17004d12350bSJunchao Zhang   if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
17019566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
17023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1703813a1f2bSShri Abhyankar }
1704813a1f2bSShri Abhyankar 
1705d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
1706d71ae5a4SJacob Faibussowitsch {
17075f44c854SHong Zhang   Mat              C  = B;
17085f44c854SHong Zhang   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
17095f44c854SHong Zhang   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
17105f44c854SHong Zhang   IS               ip = b->row, iip = b->icol;
17115f44c854SHong Zhang   const PetscInt  *rip, *riip;
17125f44c854SHong Zhang   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
17135f44c854SHong Zhang   PetscInt        *ai = a->i, *aj = a->j;
17145f44c854SHong Zhang   PetscInt         k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1715b65878eeSJunchao Zhang   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1716ace3abfcSBarry Smith   PetscBool        perm_identity;
1717d90ac83dSHong Zhang   FactorShiftCtx   sctx;
171898a93161SHong Zhang   PetscReal        rs;
1719b65878eeSJunchao Zhang   const MatScalar *aa, *v;
1720b65878eeSJunchao Zhang   MatScalar        d;
1721*421480d9SBarry Smith   const PetscInt  *adiag;
172298a93161SHong Zhang 
17235f44c854SHong Zhang   PetscFunctionBegin;
1724*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1725b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
172698a93161SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
17279566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
172898a93161SHong Zhang 
1729f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
173098a93161SHong Zhang     sctx.shift_top = info->zeropivot;
173198a93161SHong Zhang     for (i = 0; i < mbs; i++) {
173298a93161SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1733*421480d9SBarry Smith       d  = aa[adiag[i]];
173498a93161SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
173598a93161SHong Zhang       v  = aa + ai[i];
173698a93161SHong Zhang       nz = ai[i + 1] - ai[i];
17372205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
173898a93161SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
173998a93161SHong Zhang     }
174098a93161SHong Zhang     sctx.shift_top *= 1.1;
174198a93161SHong Zhang     sctx.nshift_max = 5;
174298a93161SHong Zhang     sctx.shift_lo   = 0.;
174398a93161SHong Zhang     sctx.shift_hi   = 1.;
174498a93161SHong Zhang   }
17455f44c854SHong Zhang 
17469566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
17479566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
17485f44c854SHong Zhang 
17495f44c854SHong Zhang   /* allocate working arrays
17505f44c854SHong Zhang      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
17515f44c854SHong Zhang      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
17525f44c854SHong Zhang   */
17539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
17545f44c854SHong Zhang 
175598a93161SHong Zhang   do {
175607b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
175798a93161SHong Zhang 
1758c5546cabSHong Zhang     for (i = 0; i < mbs; i++) c2r[i] = mbs;
17592e987568SSatish Balay     if (mbs) il[0] = 0;
17605f44c854SHong Zhang 
17615f44c854SHong Zhang     for (k = 0; k < mbs; k++) {
17625f44c854SHong Zhang       /* zero rtmp */
17635f44c854SHong Zhang       nz    = bi[k + 1] - bi[k];
17645f44c854SHong Zhang       bjtmp = bj + bi[k];
17655f44c854SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
17665f44c854SHong Zhang 
17675f44c854SHong Zhang       /* load in initial unfactored row */
17685f44c854SHong Zhang       bval = ba + bi[k];
17699371c9d4SSatish Balay       jmin = ai[rip[k]];
17709371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
17715f44c854SHong Zhang       for (j = jmin; j < jmax; j++) {
17725f44c854SHong Zhang         col = riip[aj[j]];
17735f44c854SHong Zhang         if (col >= k) { /* only take upper triangular entry */
17745f44c854SHong Zhang           rtmp[col] = aa[j];
17755f44c854SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
17765f44c854SHong Zhang         }
17775f44c854SHong Zhang       }
177898a93161SHong Zhang       /* shift the diagonal of the matrix: ZeropivotApply() */
177998a93161SHong Zhang       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
17805f44c854SHong Zhang 
17815f44c854SHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
17825f44c854SHong Zhang       dk = rtmp[k];
17835f44c854SHong Zhang       i  = c2r[k]; /* first row to be added to k_th row  */
17845f44c854SHong Zhang 
17855f44c854SHong Zhang       while (i < k) {
17865f44c854SHong Zhang         nexti = c2r[i]; /* next row to be added to k_th row */
17875f44c854SHong Zhang 
17885f44c854SHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
17895f44c854SHong Zhang         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
17905f44c854SHong Zhang         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
17915f44c854SHong Zhang         dk += uikdi * ba[ili];           /* update diag[k] */
17925f44c854SHong Zhang         ba[ili] = uikdi;                 /* -U(i,k) */
17935f44c854SHong Zhang 
17945f44c854SHong Zhang         /* add multiple of row i to k-th row */
17959371c9d4SSatish Balay         jmin = ili + 1;
17969371c9d4SSatish Balay         jmax = bi[i + 1];
17975f44c854SHong Zhang         if (jmin < jmax) {
17985f44c854SHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
17995f44c854SHong Zhang           /* update il and c2r for row i */
18005f44c854SHong Zhang           il[i]  = jmin;
18019371c9d4SSatish Balay           j      = bj[jmin];
18029371c9d4SSatish Balay           c2r[i] = c2r[j];
18039371c9d4SSatish Balay           c2r[j] = i;
18045f44c854SHong Zhang         }
18055f44c854SHong Zhang         i = nexti;
18065f44c854SHong Zhang       }
18075f44c854SHong Zhang 
18085f44c854SHong Zhang       /* copy data into U(k,:) */
180998a93161SHong Zhang       rs   = 0.0;
18109371c9d4SSatish Balay       jmin = bi[k];
18119371c9d4SSatish Balay       jmax = bi[k + 1] - 1;
18125f44c854SHong Zhang       if (jmin < jmax) {
18135f44c854SHong Zhang         for (j = jmin; j < jmax; j++) {
18149371c9d4SSatish Balay           col   = bj[j];
18159371c9d4SSatish Balay           ba[j] = rtmp[col];
18169371c9d4SSatish Balay           rs += PetscAbsScalar(ba[j]);
18175f44c854SHong Zhang         }
18185f44c854SHong Zhang         /* add the k-th row into il and c2r */
18195f44c854SHong Zhang         il[k]  = jmin;
18209371c9d4SSatish Balay         i      = bj[jmin];
18219371c9d4SSatish Balay         c2r[k] = c2r[i];
18229371c9d4SSatish Balay         c2r[i] = k;
18235f44c854SHong Zhang       }
182498a93161SHong Zhang 
182598a93161SHong Zhang       /* MatPivotCheck() */
182698a93161SHong Zhang       sctx.rs = rs;
182798a93161SHong Zhang       sctx.pv = dk;
18289566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
182907b50cabSHong Zhang       if (sctx.newshift) break;
183098a93161SHong Zhang       dk = sctx.pv;
183198a93161SHong Zhang 
183298a93161SHong Zhang       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
183398a93161SHong Zhang     }
183407b50cabSHong Zhang   } while (sctx.newshift);
18355f44c854SHong Zhang 
1836b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
18379566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, c2r));
18389566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
18399566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
18405f44c854SHong Zhang 
18419566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
18425f44c854SHong Zhang   if (perm_identity) {
184335233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
184435233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
18452169352eSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
18462169352eSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
18475f44c854SHong Zhang   } else {
184835233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1;
184935233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
185035233d99SShri Abhyankar     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
185135233d99SShri Abhyankar     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
18525f44c854SHong Zhang   }
18535f44c854SHong Zhang 
18545f44c854SHong Zhang   C->assembled    = PETSC_TRUE;
18555f44c854SHong Zhang   C->preallocated = PETSC_TRUE;
18562205254eSKarl Rupp 
18579566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
185898a93161SHong Zhang 
185998a93161SHong Zhang   /* MatPivotView() */
186098a93161SHong Zhang   if (sctx.nshift) {
1861f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
18629566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1863f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
18649566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1865f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
18669566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
186798a93161SHong Zhang     }
186898a93161SHong Zhang   }
18693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18705f44c854SHong Zhang }
18715f44c854SHong Zhang 
1872d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
1873d71ae5a4SJacob Faibussowitsch {
1874719d5645SBarry Smith   Mat              C  = B;
18750968510aSHong Zhang   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
18762ed133dbSHong Zhang   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
18779bfd6278SHong Zhang   IS               ip = b->row, iip = b->icol;
18785d0c19d7SBarry Smith   const PetscInt  *rip, *riip;
1879c5546cabSHong Zhang   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
18802ed133dbSHong Zhang   PetscInt        *ai = a->i, *aj = a->j;
18812ed133dbSHong Zhang   PetscInt         k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1882b65878eeSJunchao Zhang   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1883b65878eeSJunchao Zhang   const MatScalar *aa, *v;
1884ace3abfcSBarry Smith   PetscBool        perm_identity;
18850e95ead3SHong Zhang   FactorShiftCtx   sctx;
18860e95ead3SHong Zhang   PetscReal        rs;
1887b65878eeSJunchao Zhang   MatScalar        d;
1888*421480d9SBarry Smith   const PetscInt  *adiag;
1889d5d45c9bSBarry Smith 
1890a6175056SHong Zhang   PetscFunctionBegin;
1891*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1892b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
18930e95ead3SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
18949566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
18950e95ead3SHong Zhang 
18960e95ead3SHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
18970e95ead3SHong Zhang     sctx.shift_top = info->zeropivot;
18980e95ead3SHong Zhang     for (i = 0; i < mbs; i++) {
18990e95ead3SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1900*421480d9SBarry Smith       d  = aa[adiag[i]];
19010e95ead3SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
19020e95ead3SHong Zhang       v  = aa + ai[i];
19030e95ead3SHong Zhang       nz = ai[i + 1] - ai[i];
19042205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
19050e95ead3SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
19060e95ead3SHong Zhang     }
19070e95ead3SHong Zhang     sctx.shift_top *= 1.1;
19080e95ead3SHong Zhang     sctx.nshift_max = 5;
19090e95ead3SHong Zhang     sctx.shift_lo   = 0.;
19100e95ead3SHong Zhang     sctx.shift_hi   = 1.;
19110e95ead3SHong Zhang   }
1912ee45ca4aSHong Zhang 
19139566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
19149566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
19152ed133dbSHong Zhang 
19162ed133dbSHong Zhang   /* initialization */
19179566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
19180e95ead3SHong Zhang 
19192ed133dbSHong Zhang   do {
192007b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
19210e95ead3SHong Zhang 
1922c5546cabSHong Zhang     for (i = 0; i < mbs; i++) jl[i] = mbs;
1923c5546cabSHong Zhang     il[0] = 0;
19242ed133dbSHong Zhang 
19252ed133dbSHong Zhang     for (k = 0; k < mbs; k++) {
1926c5546cabSHong Zhang       /* zero rtmp */
1927c5546cabSHong Zhang       nz    = bi[k + 1] - bi[k];
1928c5546cabSHong Zhang       bjtmp = bj + bi[k];
1929c5546cabSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1930c5546cabSHong Zhang 
1931c05c3958SHong Zhang       bval = ba + bi[k];
19322ed133dbSHong Zhang       /* initialize k-th row by the perm[k]-th row of A */
19339371c9d4SSatish Balay       jmin = ai[rip[k]];
19349371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
19352ed133dbSHong Zhang       for (j = jmin; j < jmax; j++) {
19369bfd6278SHong Zhang         col = riip[aj[j]];
19372ed133dbSHong Zhang         if (col >= k) { /* only take upper triangular entry */
19382ed133dbSHong Zhang           rtmp[col] = aa[j];
1939c05c3958SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
19402ed133dbSHong Zhang         }
19412ed133dbSHong Zhang       }
194239e3d630SHong Zhang       /* shift the diagonal of the matrix */
1943540703acSHong Zhang       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
19442ed133dbSHong Zhang 
19452ed133dbSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
19462ed133dbSHong Zhang       dk = rtmp[k];
19472ed133dbSHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
19482ed133dbSHong Zhang 
19492ed133dbSHong Zhang       while (i < k) {
19502ed133dbSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
19512ed133dbSHong Zhang 
19522ed133dbSHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
19532ed133dbSHong Zhang         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
19542ed133dbSHong Zhang         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
19552ed133dbSHong Zhang         dk += uikdi * ba[ili];
19562ed133dbSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
19572ed133dbSHong Zhang 
19582ed133dbSHong Zhang         /* add multiple of row i to k-th row */
19599371c9d4SSatish Balay         jmin = ili + 1;
19609371c9d4SSatish Balay         jmax = bi[i + 1];
19612ed133dbSHong Zhang         if (jmin < jmax) {
19622ed133dbSHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
19632ed133dbSHong Zhang           /* update il and jl for row i */
19642ed133dbSHong Zhang           il[i] = jmin;
19659371c9d4SSatish Balay           j     = bj[jmin];
19669371c9d4SSatish Balay           jl[i] = jl[j];
19679371c9d4SSatish Balay           jl[j] = i;
19682ed133dbSHong Zhang         }
19692ed133dbSHong Zhang         i = nexti;
19702ed133dbSHong Zhang       }
19712ed133dbSHong Zhang 
19720697b6caSHong Zhang       /* shift the diagonals when zero pivot is detected */
19730697b6caSHong Zhang       /* compute rs=sum of abs(off-diagonal) */
19740697b6caSHong Zhang       rs   = 0.0;
19753c9e8598SHong Zhang       jmin = bi[k] + 1;
19763c9e8598SHong Zhang       nz   = bi[k + 1] - jmin;
19773c9e8598SHong Zhang       bcol = bj + jmin;
1978ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
1979540703acSHong Zhang 
1980540703acSHong Zhang       sctx.rs = rs;
1981540703acSHong Zhang       sctx.pv = dk;
19829566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
198307b50cabSHong Zhang       if (sctx.newshift) break;
19840e95ead3SHong Zhang       dk = sctx.pv;
19853c9e8598SHong Zhang 
19863c9e8598SHong Zhang       /* copy data into U(k,:) */
198739e3d630SHong Zhang       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
19889371c9d4SSatish Balay       jmin      = bi[k] + 1;
19899371c9d4SSatish Balay       jmax      = bi[k + 1];
199039e3d630SHong Zhang       if (jmin < jmax) {
199139e3d630SHong Zhang         for (j = jmin; j < jmax; j++) {
19929371c9d4SSatish Balay           col   = bj[j];
19939371c9d4SSatish Balay           ba[j] = rtmp[col];
19943c9e8598SHong Zhang         }
199539e3d630SHong Zhang         /* add the k-th row into il and jl */
19963c9e8598SHong Zhang         il[k] = jmin;
19979371c9d4SSatish Balay         i     = bj[jmin];
19989371c9d4SSatish Balay         jl[k] = jl[i];
19999371c9d4SSatish Balay         jl[i] = k;
20003c9e8598SHong Zhang       }
20013c9e8598SHong Zhang     }
200207b50cabSHong Zhang   } while (sctx.newshift);
20030e95ead3SHong Zhang 
20049566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, jl));
20059566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
20069566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
2007b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2008db4efbfdSBarry Smith 
20099566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
2010db4efbfdSBarry Smith   if (perm_identity) {
20110a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
20120a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
20130a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
20140a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2015db4efbfdSBarry Smith   } else {
20160a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
20170a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
20180a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
20190a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2020db4efbfdSBarry Smith   }
2021db4efbfdSBarry Smith 
20223c9e8598SHong Zhang   C->assembled    = PETSC_TRUE;
20233c9e8598SHong Zhang   C->preallocated = PETSC_TRUE;
20242205254eSKarl Rupp 
20259566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
2026540703acSHong Zhang   if (sctx.nshift) {
2027f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
20289566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2029f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
20309566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
20310697b6caSHong Zhang     }
20323c9e8598SHong Zhang   }
20333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20343c9e8598SHong Zhang }
2035a6175056SHong Zhang 
20368a76ed4fSHong Zhang /*
20378a76ed4fSHong Zhang    icc() under revised new data structure.
20388a76ed4fSHong Zhang    Factored arrays bj and ba are stored as
20398a76ed4fSHong Zhang      U(0,:),...,U(i,:),U(n-1,:)
20408a76ed4fSHong Zhang 
20418a76ed4fSHong Zhang    ui=fact->i is an array of size n+1, in which
20428a76ed4fSHong Zhang    ui+
20438a76ed4fSHong Zhang      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
20448a76ed4fSHong Zhang      ui[n]:  points to U(n-1,n-1)+1
20458a76ed4fSHong Zhang 
20468a76ed4fSHong Zhang   udiag=fact->diag is an array of size n,in which
20478a76ed4fSHong Zhang      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
20488a76ed4fSHong Zhang 
20498a76ed4fSHong Zhang    U(i,:) contains udiag[i] as its last entry, i.e.,
20508a76ed4fSHong Zhang     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
20518a76ed4fSHong Zhang */
20528a76ed4fSHong Zhang 
2053d71ae5a4SJacob Faibussowitsch PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2054d71ae5a4SJacob Faibussowitsch {
20550968510aSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2056ed59401aSHong Zhang   Mat_SeqSBAIJ      *b;
2057*421480d9SBarry Smith   PetscBool          perm_identity;
20585f44c854SHong Zhang   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2059*421480d9SBarry Smith   const PetscInt    *rip, *riip, *adiag;
2060ed59401aSHong Zhang   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2061*421480d9SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
20625a8e39fbSHong Zhang   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2063ed59401aSHong Zhang   PetscReal          fill = info->fill, levels = info->levels;
20640298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
20650298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
20667a48dd6fSHong Zhang   PetscBT            lnkbt;
2067b635d36bSHong Zhang   IS                 iperm;
2068*421480d9SBarry Smith   PetscBool          diagDense;
2069a6175056SHong Zhang 
2070a6175056SHong Zhang   PetscFunctionBegin;
207108401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2072*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
2073*421480d9SBarry Smith   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
20749566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
20759566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
20767a48dd6fSHong Zhang 
20779f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
20789566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
207939e3d630SHong Zhang   ui[0] = 0;
208039e3d630SHong Zhang 
20818a76ed4fSHong Zhang   /* ICC(0) without matrix ordering: simply rearrange column indices */
2082622e2028SHong Zhang   if (!levels && perm_identity) {
20835f44c854SHong Zhang     for (i = 0; i < am; i++) {
2084*421480d9SBarry Smith       ncols     = ai[i + 1] - adiag[i];
2085c5546cabSHong Zhang       ui[i + 1] = ui[i] + ncols;
2086c5546cabSHong Zhang       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2087dc1e2a3fSHong Zhang     }
20889566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2089dc1e2a3fSHong Zhang     cols = uj;
2090dc1e2a3fSHong Zhang     for (i = 0; i < am; i++) {
2091*421480d9SBarry Smith       aj    = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2092*421480d9SBarry Smith       ncols = ai[i + 1] - adiag[i] - 1;
20935f44c854SHong Zhang       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2094910cf402Sprj-       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
20955f44c854SHong Zhang     }
20965f44c854SHong Zhang   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
20979566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(iperm, &riip));
20989566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(perm, &rip));
20995f44c854SHong Zhang 
21005f44c854SHong Zhang     /* initialization */
21019566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(am + 1, &ajtmp));
21025f44c854SHong Zhang 
21035f44c854SHong Zhang     /* jl: linked list for storing indices of the pivot rows
21045f44c854SHong Zhang        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
21059566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
21065f44c854SHong Zhang     for (i = 0; i < am; i++) {
21079371c9d4SSatish Balay       jl[i] = am;
21089371c9d4SSatish Balay       il[i] = 0;
21095f44c854SHong Zhang     }
21105f44c854SHong Zhang 
21115f44c854SHong Zhang     /* create and initialize a linked list for storing column indices of the active row k */
21125f44c854SHong Zhang     nlnk = am + 1;
21139566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
21145f44c854SHong Zhang 
211595b5ac22SHong Zhang     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
21169566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
21175f44c854SHong Zhang     current_space = free_space;
21189566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
21195f44c854SHong Zhang     current_space_lvl = free_space_lvl;
21205f44c854SHong Zhang 
21215f44c854SHong Zhang     for (k = 0; k < am; k++) { /* for each active row k */
21225f44c854SHong Zhang       /* initialize lnk by the column indices of row rip[k] of A */
21235f44c854SHong Zhang       nzk   = 0;
21245f44c854SHong Zhang       ncols = ai[rip[k] + 1] - ai[rip[k]];
212528b400f6SJacob Faibussowitsch       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
21265f44c854SHong Zhang       ncols_upper = 0;
21275f44c854SHong Zhang       for (j = 0; j < ncols; j++) {
21285f44c854SHong Zhang         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
21295f44c854SHong Zhang         if (riip[i] >= k) {         /* only take upper triangular entry */
21305f44c854SHong Zhang           ajtmp[ncols_upper] = i;
21315f44c854SHong Zhang           ncols_upper++;
21325f44c854SHong Zhang         }
21335f44c854SHong Zhang       }
21349566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
21355f44c854SHong Zhang       nzk += nlnk;
21365f44c854SHong Zhang 
21375f44c854SHong Zhang       /* update lnk by computing fill-in for each pivot row to be merged in */
21385f44c854SHong Zhang       prow = jl[k]; /* 1st pivot row */
21395f44c854SHong Zhang 
21405f44c854SHong Zhang       while (prow < k) {
21415f44c854SHong Zhang         nextprow = jl[prow];
21425f44c854SHong Zhang 
21435f44c854SHong Zhang         /* merge prow into k-th row */
21445f44c854SHong Zhang         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
21455f44c854SHong Zhang         jmax  = ui[prow + 1];
21465f44c854SHong Zhang         ncols = jmax - jmin;
21475f44c854SHong Zhang         i     = jmin - ui[prow];
21485f44c854SHong Zhang         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
21495f44c854SHong Zhang         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
21505f44c854SHong Zhang         j     = *(uj - 1);
21519566063dSJacob Faibussowitsch         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
21525f44c854SHong Zhang         nzk += nlnk;
21535f44c854SHong Zhang 
21545f44c854SHong Zhang         /* update il and jl for prow */
21555f44c854SHong Zhang         if (jmin < jmax) {
21565f44c854SHong Zhang           il[prow] = jmin;
21579371c9d4SSatish Balay           j        = *cols;
21589371c9d4SSatish Balay           jl[prow] = jl[j];
21599371c9d4SSatish Balay           jl[j]    = prow;
21605f44c854SHong Zhang         }
21615f44c854SHong Zhang         prow = nextprow;
21625f44c854SHong Zhang       }
21635f44c854SHong Zhang 
21645f44c854SHong Zhang       /* if free space is not available, make more free space */
21655f44c854SHong Zhang       if (current_space->local_remaining < nzk) {
21665f44c854SHong Zhang         i = am - k + 1;                                    /* num of unfactored rows */
2167f91af8c7SBarry Smith         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
21689566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space));
21699566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
21705f44c854SHong Zhang         reallocs++;
21715f44c854SHong Zhang       }
21725f44c854SHong Zhang 
21735f44c854SHong Zhang       /* copy data into free_space and free_space_lvl, then initialize lnk */
217408401ef6SPierre Jolivet       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
21759566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
21765f44c854SHong Zhang 
21775f44c854SHong Zhang       /* add the k-th row into il and jl */
21785f44c854SHong Zhang       if (nzk > 1) {
21795f44c854SHong Zhang         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
21809371c9d4SSatish Balay         jl[k] = jl[i];
21819371c9d4SSatish Balay         jl[i] = k;
21825f44c854SHong Zhang         il[k] = ui[k] + 1;
21835f44c854SHong Zhang       }
21845f44c854SHong Zhang       uj_ptr[k]     = current_space->array;
21855f44c854SHong Zhang       uj_lvl_ptr[k] = current_space_lvl->array;
21865f44c854SHong Zhang 
21875f44c854SHong Zhang       current_space->array += nzk;
21885f44c854SHong Zhang       current_space->local_used += nzk;
21895f44c854SHong Zhang       current_space->local_remaining -= nzk;
21905f44c854SHong Zhang 
21915f44c854SHong Zhang       current_space_lvl->array += nzk;
21925f44c854SHong Zhang       current_space_lvl->local_used += nzk;
21935f44c854SHong Zhang       current_space_lvl->local_remaining -= nzk;
21945f44c854SHong Zhang 
21955f44c854SHong Zhang       ui[k + 1] = ui[k] + nzk;
21965f44c854SHong Zhang     }
21975f44c854SHong Zhang 
21989566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(perm, &rip));
21999566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(iperm, &riip));
22009566063dSJacob Faibussowitsch     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
22019566063dSJacob Faibussowitsch     PetscCall(PetscFree(ajtmp));
22025f44c854SHong Zhang 
22039263d837SHong Zhang     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
22049f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
22059566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
22069566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
22079566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
22085f44c854SHong Zhang 
22095f44c854SHong Zhang   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
22105f44c854SHong Zhang 
22115f44c854SHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
221257508eceSPierre Jolivet   b          = (Mat_SeqSBAIJ *)fact->data;
22139f0612e4SBarry Smith   b->free_ij = PETSC_TRUE;
221484648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
22159f0612e4SBarry Smith   b->free_a = PETSC_TRUE;
22165f44c854SHong Zhang   b->j      = uj;
22175f44c854SHong Zhang   b->i      = ui;
22185f44c854SHong Zhang   b->diag   = udiag;
2219f4259b30SLisandro Dalcin   b->ilen   = NULL;
2220f4259b30SLisandro Dalcin   b->imax   = NULL;
22215f44c854SHong Zhang   b->row    = perm;
22225f44c854SHong Zhang   b->col    = perm;
22239566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
22249566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
22255f44c854SHong Zhang   b->icol          = iperm;
22265f44c854SHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
22272205254eSKarl Rupp 
222884648c2dSPierre Jolivet   PetscCall(PetscMalloc1(am, &b->solve_work));
22292205254eSKarl Rupp 
22305f44c854SHong Zhang   b->maxnz = b->nz = ui[am];
22315f44c854SHong Zhang 
2232f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2233f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
22345f44c854SHong Zhang   if (ai[am] != 0) {
22356a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
223695b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
22375f44c854SHong Zhang   } else {
2238f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
22395f44c854SHong Zhang   }
22409263d837SHong Zhang #if defined(PETSC_USE_INFO)
22419263d837SHong Zhang   if (ai[am] != 0) {
22429263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
22439566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
22449566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
22459566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
22469263d837SHong Zhang   } else {
22479566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
22489263d837SHong Zhang   }
22499263d837SHong Zhang #endif
225035233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
22513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
22525f44c854SHong Zhang }
22535f44c854SHong Zhang 
2254d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2255d71ae5a4SJacob Faibussowitsch {
22560be760fbSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
22570be760fbSHong Zhang   Mat_SeqSBAIJ      *b;
2258*421480d9SBarry Smith   PetscBool          perm_identity;
22590be760fbSHong Zhang   PetscReal          fill = info->fill;
22600be760fbSHong Zhang   const PetscInt    *rip, *riip;
22610be760fbSHong Zhang   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
22620be760fbSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
22630be760fbSHong Zhang   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
22640298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
22650be760fbSHong Zhang   PetscBT            lnkbt;
22660be760fbSHong Zhang   IS                 iperm;
2267*421480d9SBarry Smith   PetscBool          diagDense;
22680be760fbSHong Zhang 
22690be760fbSHong Zhang   PetscFunctionBegin;
227008401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2271*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
2272*421480d9SBarry Smith   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
22739186b105SHong Zhang 
22740be760fbSHong Zhang   /* check whether perm is the identity mapping */
22759566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
22769566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
22779566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iperm, &riip));
22789566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
22790be760fbSHong Zhang 
22800be760fbSHong Zhang   /* initialization */
22819f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
22829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
22830be760fbSHong Zhang   ui[0] = 0;
22840be760fbSHong Zhang 
22850be760fbSHong Zhang   /* jl: linked list for storing indices of the pivot rows
22860be760fbSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
22879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
22880be760fbSHong Zhang   for (i = 0; i < am; i++) {
22899371c9d4SSatish Balay     jl[i] = am;
22909371c9d4SSatish Balay     il[i] = 0;
22910be760fbSHong Zhang   }
22920be760fbSHong Zhang 
22930be760fbSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
22940be760fbSHong Zhang   nlnk = am + 1;
22959566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
22960be760fbSHong Zhang 
229795b5ac22SHong Zhang   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
22989566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
22990be760fbSHong Zhang   current_space = free_space;
23000be760fbSHong Zhang 
23010be760fbSHong Zhang   for (k = 0; k < am; k++) { /* for each active row k */
23020be760fbSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
23030be760fbSHong Zhang     nzk   = 0;
23040be760fbSHong Zhang     ncols = ai[rip[k] + 1] - ai[rip[k]];
230528b400f6SJacob Faibussowitsch     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
23060be760fbSHong Zhang     ncols_upper = 0;
23070be760fbSHong Zhang     for (j = 0; j < ncols; j++) {
23080be760fbSHong Zhang       i = riip[*(aj + ai[rip[k]] + j)];
23090be760fbSHong Zhang       if (i >= k) { /* only take upper triangular entry */
23100be760fbSHong Zhang         cols[ncols_upper] = i;
23110be760fbSHong Zhang         ncols_upper++;
23120be760fbSHong Zhang       }
23130be760fbSHong Zhang     }
23149566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
23150be760fbSHong Zhang     nzk += nlnk;
23160be760fbSHong Zhang 
23170be760fbSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
23180be760fbSHong Zhang     prow = jl[k]; /* 1st pivot row */
23190be760fbSHong Zhang 
23200be760fbSHong Zhang     while (prow < k) {
23210be760fbSHong Zhang       nextprow = jl[prow];
23220be760fbSHong Zhang       /* merge prow into k-th row */
23230be760fbSHong Zhang       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
23240be760fbSHong Zhang       jmax   = ui[prow + 1];
23250be760fbSHong Zhang       ncols  = jmax - jmin;
23260be760fbSHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
23279566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
23280be760fbSHong Zhang       nzk += nlnk;
23290be760fbSHong Zhang 
23300be760fbSHong Zhang       /* update il and jl for prow */
23310be760fbSHong Zhang       if (jmin < jmax) {
23320be760fbSHong Zhang         il[prow] = jmin;
23332205254eSKarl Rupp         j        = *uj_ptr;
23342205254eSKarl Rupp         jl[prow] = jl[j];
23352205254eSKarl Rupp         jl[j]    = prow;
23360be760fbSHong Zhang       }
23370be760fbSHong Zhang       prow = nextprow;
23380be760fbSHong Zhang     }
23390be760fbSHong Zhang 
23400be760fbSHong Zhang     /* if free space is not available, make more free space */
23410be760fbSHong Zhang     if (current_space->local_remaining < nzk) {
23420be760fbSHong Zhang       i = am - k + 1;                                    /* num of unfactored rows */
2343f91af8c7SBarry Smith       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
23449566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
23450be760fbSHong Zhang       reallocs++;
23460be760fbSHong Zhang     }
23470be760fbSHong Zhang 
23480be760fbSHong Zhang     /* copy data into free space, then initialize lnk */
23499566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
23500be760fbSHong Zhang 
23510be760fbSHong Zhang     /* add the k-th row into il and jl */
23527b056e98SHong Zhang     if (nzk > 1) {
23530be760fbSHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
23549371c9d4SSatish Balay       jl[k] = jl[i];
23559371c9d4SSatish Balay       jl[i] = k;
23560be760fbSHong Zhang       il[k] = ui[k] + 1;
23570be760fbSHong Zhang     }
23580be760fbSHong Zhang     ui_ptr[k] = current_space->array;
23592205254eSKarl Rupp 
23600be760fbSHong Zhang     current_space->array += nzk;
23610be760fbSHong Zhang     current_space->local_used += nzk;
23620be760fbSHong Zhang     current_space->local_remaining -= nzk;
23630be760fbSHong Zhang 
23640be760fbSHong Zhang     ui[k + 1] = ui[k] + nzk;
23650be760fbSHong Zhang   }
23660be760fbSHong Zhang 
23679566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
23689566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iperm, &riip));
23699566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
23700be760fbSHong Zhang 
23719263d837SHong Zhang   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
237284648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj));
23739566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
23749566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
23750be760fbSHong Zhang 
23760be760fbSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
2377f284f12aSHong Zhang   b          = (Mat_SeqSBAIJ *)fact->data;
23780be760fbSHong Zhang   b->free_ij = PETSC_TRUE;
237984648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
23809f0612e4SBarry Smith   b->free_a = PETSC_TRUE;
23810be760fbSHong Zhang   b->j      = uj;
23820be760fbSHong Zhang   b->i      = ui;
23830be760fbSHong Zhang   b->diag   = udiag;
2384f4259b30SLisandro Dalcin   b->ilen   = NULL;
2385f4259b30SLisandro Dalcin   b->imax   = NULL;
23860be760fbSHong Zhang   b->row    = perm;
23870be760fbSHong Zhang   b->col    = perm;
238826fbe8dcSKarl Rupp 
23899566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
23909566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
23912205254eSKarl Rupp 
23920be760fbSHong Zhang   b->icol          = iperm;
23930be760fbSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
239426fbe8dcSKarl Rupp 
239584648c2dSPierre Jolivet   PetscCall(PetscMalloc1(am, &b->solve_work));
23962205254eSKarl Rupp 
23970be760fbSHong Zhang   b->maxnz = b->nz = ui[am];
23980be760fbSHong Zhang 
2399f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2400f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
24010be760fbSHong Zhang   if (ai[am] != 0) {
24026a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
240395b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
24040be760fbSHong Zhang   } else {
2405f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
24060be760fbSHong Zhang   }
24079263d837SHong Zhang #if defined(PETSC_USE_INFO)
24089263d837SHong Zhang   if (ai[am] != 0) {
24099263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
24109566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
24119566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
24129566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
24139263d837SHong Zhang   } else {
24149566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
24159263d837SHong Zhang   }
24169263d837SHong Zhang #endif
241735233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
24183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24190be760fbSHong Zhang }
24200be760fbSHong Zhang 
2421d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
2422d71ae5a4SJacob Faibussowitsch {
2423813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
2424813a1f2bSShri Abhyankar   PetscInt           n  = A->rmap->n;
2425813a1f2bSShri Abhyankar   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
2426813a1f2bSShri Abhyankar   PetscScalar       *x, sum;
2427813a1f2bSShri Abhyankar   const PetscScalar *b;
2428b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
2429813a1f2bSShri Abhyankar   PetscInt           i, nz;
2430813a1f2bSShri Abhyankar 
2431813a1f2bSShri Abhyankar   PetscFunctionBegin;
24323ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2433813a1f2bSShri Abhyankar 
2434b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
24359566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
24369566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
2437813a1f2bSShri Abhyankar 
2438813a1f2bSShri Abhyankar   /* forward solve the lower triangular */
2439813a1f2bSShri Abhyankar   x[0] = b[0];
2440813a1f2bSShri Abhyankar   v    = aa;
2441813a1f2bSShri Abhyankar   vi   = aj;
2442813a1f2bSShri Abhyankar   for (i = 1; i < n; i++) {
2443813a1f2bSShri Abhyankar     nz  = ai[i + 1] - ai[i];
2444813a1f2bSShri Abhyankar     sum = b[i];
2445813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2446813a1f2bSShri Abhyankar     v += nz;
2447813a1f2bSShri Abhyankar     vi += nz;
2448813a1f2bSShri Abhyankar     x[i] = sum;
2449813a1f2bSShri Abhyankar   }
2450813a1f2bSShri Abhyankar 
2451813a1f2bSShri Abhyankar   /* backward solve the upper triangular */
245262fbd6afSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
24533c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
24543c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
2455813a1f2bSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
2456813a1f2bSShri Abhyankar     sum = x[i];
2457813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
24583c0119dfSShri Abhyankar     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
2459813a1f2bSShri Abhyankar   }
2460813a1f2bSShri Abhyankar 
24619566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2462b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
24639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
24649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
24653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2466813a1f2bSShri Abhyankar }
2467813a1f2bSShri Abhyankar 
2468d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
2469d71ae5a4SJacob Faibussowitsch {
2470f268cba8SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
2471f268cba8SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
2472f268cba8SShri Abhyankar   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
2473f268cba8SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
2474f268cba8SShri Abhyankar   PetscScalar       *x, *tmp, sum;
2475f268cba8SShri Abhyankar   const PetscScalar *b;
2476b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
2477f268cba8SShri Abhyankar 
2478f268cba8SShri Abhyankar   PetscFunctionBegin;
24793ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2480f268cba8SShri Abhyankar 
2481b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
24829566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
24839566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
2484f268cba8SShri Abhyankar   tmp = a->solve_work;
2485f268cba8SShri Abhyankar 
24869371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
24879371c9d4SSatish Balay   r = rout;
24889371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
24899371c9d4SSatish Balay   c = cout;
2490f268cba8SShri Abhyankar 
2491f268cba8SShri Abhyankar   /* forward solve the lower triangular */
2492f268cba8SShri Abhyankar   tmp[0] = b[r[0]];
2493f268cba8SShri Abhyankar   v      = aa;
2494f268cba8SShri Abhyankar   vi     = aj;
2495f268cba8SShri Abhyankar   for (i = 1; i < n; i++) {
2496f268cba8SShri Abhyankar     nz  = ai[i + 1] - ai[i];
2497f268cba8SShri Abhyankar     sum = b[r[i]];
2498f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2499f268cba8SShri Abhyankar     tmp[i] = sum;
25009371c9d4SSatish Balay     v += nz;
25019371c9d4SSatish Balay     vi += nz;
2502f268cba8SShri Abhyankar   }
2503f268cba8SShri Abhyankar 
2504f268cba8SShri Abhyankar   /* backward solve the upper triangular */
2505f268cba8SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
25063c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
25073c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
2508f268cba8SShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
2509f268cba8SShri Abhyankar     sum = tmp[i];
2510f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2511f268cba8SShri Abhyankar     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
2512f268cba8SShri Abhyankar   }
2513f268cba8SShri Abhyankar 
25149566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
25159566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
2516b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
25179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
25189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
25199566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
25203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2521f268cba8SShri Abhyankar }
2522