xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision da81f9329be15cc55f054c8a00978087195c9247)
149b5e25fSSatish Balay 
2c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>
3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
4af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
5c6db04a5SJed Brown #include <petscis.h>
649b5e25fSSatish Balay 
7d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
8d71ae5a4SJacob Faibussowitsch {
94ff4e45dSHong Zhang   Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
104ff4e45dSHong Zhang   MatScalar    *dd   = fact->a;
114ff4e45dSHong Zhang   PetscInt      mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;
125f9f512dSHong Zhang 
135f9f512dSHong Zhang   PetscFunctionBegin;
145f80ce2aSJacob Faibussowitsch   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);
154ff4e45dSHong Zhang 
169371c9d4SSatish Balay   nneg_tmp = 0;
179371c9d4SSatish Balay   npos_tmp = 0;
18eeeff2ecSHong Zhang   for (i = 0; i < mbs; i++) {
1926fbe8dcSKarl Rupp     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
204ff4e45dSHong Zhang     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
21eeeff2ecSHong Zhang     fi++;
223e0d88b5SBarry Smith   }
234ff4e45dSHong Zhang   if (nneg) *nneg = nneg_tmp;
24eeeff2ecSHong Zhang   if (npos) *npos = npos_tmp;
254ff4e45dSHong Zhang   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
265f9f512dSHong Zhang   PetscFunctionReturn(0);
275f9f512dSHong Zhang }
285f9f512dSHong Zhang 
295f9f512dSHong Zhang /*
305f9f512dSHong Zhang   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
3110c27e3dSHong Zhang   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
325f9f512dSHong Zhang */
33d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
34d71ae5a4SJacob Faibussowitsch {
3510c27e3dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b;
365d0c19d7SBarry Smith   const PetscInt *rip, *ai, *aj;
375d0c19d7SBarry Smith   PetscInt        i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
3810c27e3dSHong Zhang   PetscInt        m, reallocs = 0, prow;
3910c27e3dSHong Zhang   PetscInt       *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
4010c27e3dSHong Zhang   PetscReal       f = info->fill;
41ace3abfcSBarry Smith   PetscBool       perm_identity;
4210c27e3dSHong Zhang 
4310c27e3dSHong Zhang   PetscFunctionBegin;
4410c27e3dSHong Zhang   /* check whether perm is the identity mapping */
459566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
469566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
4710c27e3dSHong Zhang 
4810c27e3dSHong Zhang   if (perm_identity) { /* without permutation */
4910c27e3dSHong Zhang     a->permute = PETSC_FALSE;
5026fbe8dcSKarl Rupp 
519371c9d4SSatish Balay     ai = a->i;
529371c9d4SSatish Balay     aj = a->j;
5310c27e3dSHong Zhang   } else { /* non-trivial permutation */
5410c27e3dSHong Zhang     a->permute = PETSC_TRUE;
5526fbe8dcSKarl Rupp 
569566063dSJacob Faibussowitsch     PetscCall(MatReorderingSeqSBAIJ(A, perm));
5726fbe8dcSKarl Rupp 
589371c9d4SSatish Balay     ai = a->inew;
599371c9d4SSatish Balay     aj = a->jnew;
6010c27e3dSHong Zhang   }
6110c27e3dSHong Zhang 
6210c27e3dSHong Zhang   /* initialization */
639566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs + 1, &iu));
649371c9d4SSatish Balay   umax = (PetscInt)(f * ai[mbs] + 1);
659371c9d4SSatish Balay   umax += mbs + 1;
669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(umax, &ju));
6710c27e3dSHong Zhang   iu[0] = mbs + 1;
6810c27e3dSHong Zhang   juidx = mbs + 1; /* index for ju */
69d8c74875SBarry Smith   /* jl linked list for pivot row -- linked list for col index */
709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
7110c27e3dSHong Zhang   for (i = 0; i < mbs; i++) {
7210c27e3dSHong Zhang     jl[i] = mbs;
7310c27e3dSHong Zhang     q[i]  = 0;
7410c27e3dSHong Zhang   }
7510c27e3dSHong Zhang 
7610c27e3dSHong Zhang   /* for each row k */
7710c27e3dSHong Zhang   for (k = 0; k < mbs; k++) {
7810c27e3dSHong Zhang     for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
7910c27e3dSHong Zhang     nzk  = 0;                           /* num. of nz blocks in k-th block row with diagonal block excluded */
8010c27e3dSHong Zhang     q[k] = mbs;
8110c27e3dSHong Zhang     /* initialize nonzero structure of k-th row to row rip[k] of A */
8210c27e3dSHong Zhang     jmin = ai[rip[k]] + 1; /* exclude diag[k] */
8310c27e3dSHong Zhang     jmax = ai[rip[k] + 1];
8410c27e3dSHong Zhang     for (j = jmin; j < jmax; j++) {
8510c27e3dSHong Zhang       vj = rip[aj[j]]; /* col. value */
8610c27e3dSHong Zhang       if (vj > k) {
8710c27e3dSHong Zhang         qm = k;
8810c27e3dSHong Zhang         do {
899371c9d4SSatish Balay           m  = qm;
909371c9d4SSatish Balay           qm = q[m];
9110c27e3dSHong Zhang         } while (qm < vj);
925f80ce2aSJacob Faibussowitsch         PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
9310c27e3dSHong Zhang         nzk++;
9410c27e3dSHong Zhang         q[m]  = vj;
9510c27e3dSHong Zhang         q[vj] = qm;
9610c27e3dSHong Zhang       } /* if (vj > k) */
9710c27e3dSHong Zhang     }   /* for (j=jmin; j<jmax; j++) */
9810c27e3dSHong Zhang 
9910c27e3dSHong Zhang     /* modify nonzero structure of k-th row by computing fill-in
10010c27e3dSHong Zhang        for each row i to be merged in */
10110c27e3dSHong Zhang     prow = k;
10210c27e3dSHong Zhang     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
10310c27e3dSHong Zhang 
10410c27e3dSHong Zhang     while (prow < k) {
10510c27e3dSHong Zhang       /* merge row prow into k-th row */
1069371c9d4SSatish Balay       jmin = iu[prow] + 1;
1079371c9d4SSatish Balay       jmax = iu[prow + 1];
10810c27e3dSHong Zhang       qm   = k;
10910c27e3dSHong Zhang       for (j = jmin; j < jmax; j++) {
11010c27e3dSHong Zhang         vj = ju[j];
11110c27e3dSHong Zhang         do {
1129371c9d4SSatish Balay           m  = qm;
1139371c9d4SSatish Balay           qm = q[m];
11410c27e3dSHong Zhang         } while (qm < vj);
11510c27e3dSHong Zhang         if (qm != vj) {
1169371c9d4SSatish Balay           nzk++;
1179371c9d4SSatish Balay           q[m]  = vj;
1189371c9d4SSatish Balay           q[vj] = qm;
1199371c9d4SSatish Balay           qm    = vj;
12010c27e3dSHong Zhang         }
12110c27e3dSHong Zhang       }
12210c27e3dSHong Zhang       prow = jl[prow]; /* next pivot row */
12310c27e3dSHong Zhang     }
12410c27e3dSHong Zhang 
12510c27e3dSHong Zhang     /* add k to row list for first nonzero element in k-th row */
12610c27e3dSHong Zhang     if (nzk > 0) {
12710c27e3dSHong Zhang       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
1289371c9d4SSatish Balay       jl[k] = jl[i];
1299371c9d4SSatish Balay       jl[i] = k;
13010c27e3dSHong Zhang     }
13110c27e3dSHong Zhang     iu[k + 1] = iu[k] + nzk;
13210c27e3dSHong Zhang 
13310c27e3dSHong Zhang     /* allocate more space to ju if needed */
13410c27e3dSHong Zhang     if (iu[k + 1] > umax) {
13510c27e3dSHong Zhang       /* estimate how much additional space we will need */
13610c27e3dSHong Zhang       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
13710c27e3dSHong Zhang       /* just double the memory each time */
13810c27e3dSHong Zhang       maxadd = umax;
13910c27e3dSHong Zhang       if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
14010c27e3dSHong Zhang       umax += maxadd;
14110c27e3dSHong Zhang 
14210c27e3dSHong Zhang       /* allocate a longer ju */
1439566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(umax, &jutmp));
1449566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
1459566063dSJacob Faibussowitsch       PetscCall(PetscFree(ju));
14610c27e3dSHong Zhang       ju = jutmp;
14710c27e3dSHong Zhang       reallocs++; /* count how many times we realloc */
14810c27e3dSHong Zhang     }
14910c27e3dSHong Zhang 
15010c27e3dSHong Zhang     /* save nonzero structure of k-th row in ju */
15110c27e3dSHong Zhang     i = k;
15210c27e3dSHong Zhang     while (nzk--) {
15310c27e3dSHong Zhang       i           = q[i];
15410c27e3dSHong Zhang       ju[juidx++] = i;
15510c27e3dSHong Zhang     }
15610c27e3dSHong Zhang   }
15710c27e3dSHong Zhang 
1586cf91177SBarry Smith #if defined(PETSC_USE_INFO)
15910c27e3dSHong Zhang   if (ai[mbs] != 0) {
16010c27e3dSHong Zhang     PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
1619566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1629566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
1639566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
1649566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
16510c27e3dSHong Zhang   } else {
1669566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
16710c27e3dSHong Zhang   }
16863ba0a88SBarry Smith #endif
16910c27e3dSHong Zhang 
1709566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
1719566063dSJacob Faibussowitsch   PetscCall(PetscFree2(jl, q));
17210c27e3dSHong Zhang 
17310c27e3dSHong Zhang   /* put together the new matrix */
1749566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));
17510c27e3dSHong Zhang 
176719d5645SBarry Smith   b               = (Mat_SeqSBAIJ *)(F)->data;
17710c27e3dSHong Zhang   b->singlemalloc = PETSC_FALSE;
178e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
179e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
18026fbe8dcSKarl Rupp 
1819566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a));
18210c27e3dSHong Zhang   b->j    = ju;
18310c27e3dSHong Zhang   b->i    = iu;
184f4259b30SLisandro Dalcin   b->diag = NULL;
185f4259b30SLisandro Dalcin   b->ilen = NULL;
186f4259b30SLisandro Dalcin   b->imax = NULL;
18710c27e3dSHong Zhang   b->row  = perm;
18826fbe8dcSKarl Rupp 
18910c27e3dSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
19026fbe8dcSKarl Rupp 
1919566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
19226fbe8dcSKarl Rupp 
19310c27e3dSHong Zhang   b->icol = perm;
1949566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
1959566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
19610c27e3dSHong Zhang   /* In b structure:  Free imax, ilen, old a, old j.
19710c27e3dSHong Zhang      Allocate idnew, solve_work, new a, new j */
19810c27e3dSHong Zhang   b->maxnz = b->nz = iu[mbs];
19910c27e3dSHong Zhang 
200719d5645SBarry Smith   (F)->info.factor_mallocs   = reallocs;
201719d5645SBarry Smith   (F)->info.fill_ratio_given = f;
20210c27e3dSHong Zhang   if (ai[mbs] != 0) {
203719d5645SBarry Smith     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
20410c27e3dSHong Zhang   } else {
205719d5645SBarry Smith     (F)->info.fill_ratio_needed = 0.0;
20610c27e3dSHong Zhang   }
2079566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
20810c27e3dSHong Zhang   PetscFunctionReturn(0);
20910c27e3dSHong Zhang }
21010c27e3dSHong Zhang /*
21110c27e3dSHong Zhang     Symbolic U^T*D*U factorization for SBAIJ format.
21280d3d5a7SHong Zhang     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
21310c27e3dSHong Zhang */
214c6db04a5SJed Brown #include <petscbt.h>
215c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
216d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
217d71ae5a4SJacob Faibussowitsch {
21898a8e78dSHong Zhang   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
21998a8e78dSHong Zhang   Mat_SeqSBAIJ      *b;
220ace3abfcSBarry Smith   PetscBool          perm_identity, missing;
22198a8e78dSHong Zhang   PetscReal          fill = info->fill;
2227b056e98SHong Zhang   const PetscInt    *rip, *ai = a->i, *aj = a->j;
2239186b105SHong Zhang   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
22498a8e78dSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
225c6d0d4f0SHong Zhang   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2260298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
22798a8e78dSHong Zhang   PetscBT            lnkbt;
228d595f711SHong Zhang 
229d595f711SHong Zhang   PetscFunctionBegin;
2305f80ce2aSJacob Faibussowitsch   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2319566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
2325f80ce2aSJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2336d07c2b1SHong Zhang   if (bs > 1) {
2349566063dSJacob Faibussowitsch     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
2356d07c2b1SHong Zhang     PetscFunctionReturn(0);
2366d07c2b1SHong Zhang   }
237d595f711SHong Zhang 
238d595f711SHong Zhang   /* check whether perm is the identity mapping */
2399566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
2405f80ce2aSJacob Faibussowitsch   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
241d595f711SHong Zhang   a->permute = PETSC_FALSE;
2429566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
243d595f711SHong Zhang 
244d595f711SHong Zhang   /* initialization */
2459566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs + 1, &ui));
2469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs + 1, &udiag));
247d595f711SHong Zhang   ui[0] = 0;
248d595f711SHong Zhang 
249d595f711SHong Zhang   /* jl: linked list for storing indices of the pivot rows
250d595f711SHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
2519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
252d595f711SHong Zhang   for (i = 0; i < mbs; i++) {
2539371c9d4SSatish Balay     jl[i] = mbs;
2549371c9d4SSatish Balay     il[i] = 0;
255d595f711SHong Zhang   }
256d595f711SHong Zhang 
257d595f711SHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
258d595f711SHong Zhang   nlnk = mbs + 1;
2599566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
260d595f711SHong Zhang 
261d595f711SHong Zhang   /* initial FreeSpace size is fill*(ai[mbs]+1) */
2629566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
263d595f711SHong Zhang   current_space = free_space;
264d595f711SHong Zhang 
265d595f711SHong Zhang   for (k = 0; k < mbs; k++) { /* for each active row k */
266d595f711SHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
267d595f711SHong Zhang     nzk   = 0;
268c6d0d4f0SHong Zhang     ncols = ai[k + 1] - ai[k];
2695f80ce2aSJacob Faibussowitsch     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
270d595f711SHong Zhang     for (j = 0; j < ncols; j++) {
271c6d0d4f0SHong Zhang       i       = *(aj + ai[k] + j);
272c6d0d4f0SHong Zhang       cols[j] = i;
273d595f711SHong Zhang     }
2749566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
275d595f711SHong Zhang     nzk += nlnk;
276d595f711SHong Zhang 
277d595f711SHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
278d595f711SHong Zhang     prow = jl[k]; /* 1st pivot row */
279d595f711SHong Zhang 
280d595f711SHong Zhang     while (prow < k) {
281d595f711SHong Zhang       nextprow = jl[prow];
282d595f711SHong Zhang       /* merge prow into k-th row */
283d595f711SHong Zhang       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
284d595f711SHong Zhang       jmax   = ui[prow + 1];
285d595f711SHong Zhang       ncols  = jmax - jmin;
286d595f711SHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
2879566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
288d595f711SHong Zhang       nzk += nlnk;
289d595f711SHong Zhang 
290d595f711SHong Zhang       /* update il and jl for prow */
291d595f711SHong Zhang       if (jmin < jmax) {
292d595f711SHong Zhang         il[prow] = jmin;
2939371c9d4SSatish Balay         j        = *uj_ptr;
2949371c9d4SSatish Balay         jl[prow] = jl[j];
2959371c9d4SSatish Balay         jl[j]    = prow;
296d595f711SHong Zhang       }
297d595f711SHong Zhang       prow = nextprow;
298d595f711SHong Zhang     }
299d595f711SHong Zhang 
300d595f711SHong Zhang     /* if free space is not available, make more free space */
301d595f711SHong Zhang     if (current_space->local_remaining < nzk) {
302d595f711SHong Zhang       i = mbs - k + 1;                                   /* num of unfactored rows */
303f91af8c7SBarry Smith       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3049566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
305d595f711SHong Zhang       reallocs++;
306d595f711SHong Zhang     }
307d595f711SHong Zhang 
308d595f711SHong Zhang     /* copy data into free space, then initialize lnk */
3099566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
310d595f711SHong Zhang 
311d595f711SHong Zhang     /* add the k-th row into il and jl */
3127b056e98SHong Zhang     if (nzk > 1) {
313d595f711SHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
3149371c9d4SSatish Balay       jl[k] = jl[i];
3159371c9d4SSatish Balay       jl[i] = k;
316d595f711SHong Zhang       il[k] = ui[k] + 1;
317d595f711SHong Zhang     }
318d595f711SHong Zhang     ui_ptr[k] = current_space->array;
31926fbe8dcSKarl Rupp 
320d595f711SHong Zhang     current_space->array += nzk;
321d595f711SHong Zhang     current_space->local_used += nzk;
322d595f711SHong Zhang     current_space->local_remaining -= nzk;
323d595f711SHong Zhang 
324d595f711SHong Zhang     ui[k + 1] = ui[k] + nzk;
325d595f711SHong Zhang   }
326d595f711SHong Zhang 
3279566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
3289566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, il, jl, cols));
329d595f711SHong Zhang 
330d595f711SHong Zhang   /* destroy list of free space and other temporary array(s) */
3319566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
3329566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
3339566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
334d595f711SHong Zhang 
335d595f711SHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
3369566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
337d595f711SHong Zhang 
3387b056e98SHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
339d595f711SHong Zhang   b->singlemalloc = PETSC_FALSE;
340d595f711SHong Zhang   b->free_a       = PETSC_TRUE;
341d595f711SHong Zhang   b->free_ij      = PETSC_TRUE;
34226fbe8dcSKarl Rupp 
3439566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
34426fbe8dcSKarl Rupp 
345d595f711SHong Zhang   b->j         = uj;
346d595f711SHong Zhang   b->i         = ui;
347c6d0d4f0SHong Zhang   b->diag      = udiag;
348c6d0d4f0SHong Zhang   b->free_diag = PETSC_TRUE;
349f4259b30SLisandro Dalcin   b->ilen      = NULL;
350f4259b30SLisandro Dalcin   b->imax      = NULL;
351d595f711SHong Zhang   b->row       = perm;
352d595f711SHong Zhang   b->icol      = perm;
35326fbe8dcSKarl Rupp 
3549566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
3559566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
35626fbe8dcSKarl Rupp 
3577b056e98SHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
35826fbe8dcSKarl Rupp 
3599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
36026fbe8dcSKarl Rupp 
361d595f711SHong Zhang   b->maxnz = b->nz = ui[mbs];
362d595f711SHong Zhang 
36395b5ac22SHong Zhang   fact->info.factor_mallocs   = reallocs;
36495b5ac22SHong Zhang   fact->info.fill_ratio_given = fill;
365d595f711SHong Zhang   if (ai[mbs] != 0) {
36695b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
367d595f711SHong Zhang   } else {
36895b5ac22SHong Zhang     fact->info.fill_ratio_needed = 0.0;
369d595f711SHong Zhang   }
37095b5ac22SHong Zhang #if defined(PETSC_USE_INFO)
37195b5ac22SHong Zhang   if (ai[mbs] != 0) {
37295b5ac22SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
3739566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3749566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3759566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
37695b5ac22SHong Zhang   } else {
3779566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
37895b5ac22SHong Zhang   }
37995b5ac22SHong Zhang #endif
380c6d0d4f0SHong Zhang   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
381d595f711SHong Zhang   PetscFunctionReturn(0);
382d595f711SHong Zhang }
383d595f711SHong Zhang 
384d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
385d71ae5a4SJacob Faibussowitsch {
386d595f711SHong Zhang   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
387d595f711SHong Zhang   Mat_SeqSBAIJ      *b;
388ace3abfcSBarry Smith   PetscBool          perm_identity, missing;
389d595f711SHong Zhang   PetscReal          fill = info->fill;
390d595f711SHong Zhang   const PetscInt    *rip, *ai, *aj;
391d595f711SHong Zhang   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
392d595f711SHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
393d595f711SHong Zhang   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
3940298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
395d595f711SHong Zhang   PetscBT            lnkbt;
39649b5e25fSSatish Balay 
39749b5e25fSSatish Balay   PetscFunctionBegin;
3989566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &d));
3995f80ce2aSJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
40058ebbce7SBarry Smith 
40110c27e3dSHong Zhang   /*
40210c27e3dSHong Zhang    This code originally uses Modified Sparse Row (MSR) storage
403*da81f932SPierre Jolivet    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
40410c27e3dSHong Zhang    Then it is rewritten so the factor B takes seqsbaij format. However the associated
40510c27e3dSHong Zhang    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
40610c27e3dSHong Zhang    thus the original code in MSR format is still used for these cases.
40710c27e3dSHong Zhang    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
40810c27e3dSHong Zhang    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
40910c27e3dSHong Zhang   */
410fff829cfSHong Zhang   if (bs > 1) {
4119566063dSJacob Faibussowitsch     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
412fff829cfSHong Zhang     PetscFunctionReturn(0);
413fff829cfSHong Zhang   }
41410c27e3dSHong Zhang 
41598a8e78dSHong Zhang   /* check whether perm is the identity mapping */
4169566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
4175f80ce2aSJacob Faibussowitsch   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
418fff829cfSHong Zhang   a->permute = PETSC_FALSE;
4199371c9d4SSatish Balay   ai         = a->i;
4209371c9d4SSatish Balay   aj         = a->j;
4219566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
422fff829cfSHong Zhang 
423fff829cfSHong Zhang   /* initialization */
4249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs + 1, &ui));
42598a8e78dSHong Zhang   ui[0] = 0;
42698a8e78dSHong Zhang 
42798a8e78dSHong Zhang   /* jl: linked list for storing indices of the pivot rows
4287625dc9aSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
4299566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
4307625dc9aSHong Zhang   for (i = 0; i < mbs; i++) {
4319371c9d4SSatish Balay     jl[i] = mbs;
4329371c9d4SSatish Balay     il[i] = 0;
433fff829cfSHong Zhang   }
434fff829cfSHong Zhang 
43598a8e78dSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
4367625dc9aSHong Zhang   nlnk = mbs + 1;
4379566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
438fff829cfSHong Zhang 
4397625dc9aSHong Zhang   /* initial FreeSpace size is fill*(ai[mbs]+1) */
4409566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
44198a8e78dSHong Zhang   current_space = free_space;
44298a8e78dSHong Zhang 
4437625dc9aSHong Zhang   for (k = 0; k < mbs; k++) { /* for each active row k */
44498a8e78dSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
44598a8e78dSHong Zhang     nzk   = 0;
44698a8e78dSHong Zhang     ncols = ai[rip[k] + 1] - ai[rip[k]];
4477625dc9aSHong Zhang     for (j = 0; j < ncols; j++) {
4487625dc9aSHong Zhang       i       = *(aj + ai[rip[k]] + j);
4497625dc9aSHong Zhang       cols[j] = rip[i];
4507625dc9aSHong Zhang     }
4519566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
45298a8e78dSHong Zhang     nzk += nlnk;
45398a8e78dSHong Zhang 
45498a8e78dSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
45598a8e78dSHong Zhang     prow = jl[k]; /* 1st pivot row */
456fff829cfSHong Zhang 
457fff829cfSHong Zhang     while (prow < k) {
458fff829cfSHong Zhang       nextprow = jl[prow];
45998a8e78dSHong Zhang       /* merge prow into k-th row */
4607625dc9aSHong Zhang       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
46198a8e78dSHong Zhang       jmax   = ui[prow + 1];
46298a8e78dSHong Zhang       ncols  = jmax - jmin;
4637625dc9aSHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
4649566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
46598a8e78dSHong Zhang       nzk += nlnk;
466fff829cfSHong Zhang 
46798a8e78dSHong Zhang       /* update il and jl for prow */
468fff829cfSHong Zhang       if (jmin < jmax) {
469fff829cfSHong Zhang         il[prow] = jmin;
47026fbe8dcSKarl Rupp 
4719371c9d4SSatish Balay         j        = *uj_ptr;
4729371c9d4SSatish Balay         jl[prow] = jl[j];
4739371c9d4SSatish Balay         jl[j]    = prow;
474fff829cfSHong Zhang       }
475fff829cfSHong Zhang       prow = nextprow;
476fff829cfSHong Zhang     }
477fff829cfSHong Zhang 
47898a8e78dSHong Zhang     /* if free space is not available, make more free space */
47998a8e78dSHong Zhang     if (current_space->local_remaining < nzk) {
4807625dc9aSHong Zhang       i = mbs - k + 1;                                                            /* num of unfactored rows */
481f91af8c7SBarry Smith       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
4829566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
48398a8e78dSHong Zhang       reallocs++;
48498a8e78dSHong Zhang     }
48598a8e78dSHong Zhang 
48698a8e78dSHong Zhang     /* copy data into free space, then initialize lnk */
4879566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
48898a8e78dSHong Zhang 
48998a8e78dSHong Zhang     /* add the k-th row into il and jl */
49098a8e78dSHong Zhang     if (nzk - 1 > 0) {
4917625dc9aSHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
4929371c9d4SSatish Balay       jl[k] = jl[i];
4939371c9d4SSatish Balay       jl[i] = k;
49498a8e78dSHong Zhang       il[k] = ui[k] + 1;
495fff829cfSHong Zhang     }
4967625dc9aSHong Zhang     ui_ptr[k] = current_space->array;
49726fbe8dcSKarl Rupp 
49898a8e78dSHong Zhang     current_space->array += nzk;
49998a8e78dSHong Zhang     current_space->local_used += nzk;
50098a8e78dSHong Zhang     current_space->local_remaining -= nzk;
501fff829cfSHong Zhang 
50298a8e78dSHong Zhang     ui[k + 1] = ui[k] + nzk;
503fff829cfSHong Zhang   }
504fff829cfSHong Zhang 
5059566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
5069566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, il, jl, cols));
507fff829cfSHong Zhang 
50898a8e78dSHong Zhang   /* destroy list of free space and other temporary array(s) */
5099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
5109566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
5119566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
512fff829cfSHong Zhang 
51398a8e78dSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
5149566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
51598a8e78dSHong Zhang 
51695b5ac22SHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
517fff829cfSHong Zhang   b->singlemalloc = PETSC_FALSE;
518e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
519e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
52026fbe8dcSKarl Rupp 
5219566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
52226fbe8dcSKarl Rupp 
52398a8e78dSHong Zhang   b->j    = uj;
52498a8e78dSHong Zhang   b->i    = ui;
525f4259b30SLisandro Dalcin   b->diag = NULL;
526f4259b30SLisandro Dalcin   b->ilen = NULL;
527f4259b30SLisandro Dalcin   b->imax = NULL;
528fff829cfSHong Zhang   b->row  = perm;
52926fbe8dcSKarl Rupp 
530fff829cfSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
53126fbe8dcSKarl Rupp 
5329566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
533fff829cfSHong Zhang   b->icol = perm;
5349566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
5359566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
5367625dc9aSHong Zhang   b->maxnz = b->nz = ui[mbs];
537fff829cfSHong Zhang 
53895b5ac22SHong Zhang   fact->info.factor_mallocs   = reallocs;
53995b5ac22SHong Zhang   fact->info.fill_ratio_given = fill;
5407625dc9aSHong Zhang   if (ai[mbs] != 0) {
54195b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
542fff829cfSHong Zhang   } else {
54395b5ac22SHong Zhang     fact->info.fill_ratio_needed = 0.0;
544fff829cfSHong Zhang   }
54595b5ac22SHong Zhang #if defined(PETSC_USE_INFO)
54695b5ac22SHong Zhang   if (ai[mbs] != 0) {
54795b5ac22SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
5489566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
5499566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
5509566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
55195b5ac22SHong Zhang   } else {
5529566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
55395b5ac22SHong Zhang   }
55495b5ac22SHong Zhang #endif
5559566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
556064503c5SHong Zhang   PetscFunctionReturn(0);
557064503c5SHong Zhang }
558d595f711SHong Zhang 
559d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
560d71ae5a4SJacob Faibussowitsch {
5614c16a6a6SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
5624c16a6a6SHong Zhang   IS              perm = b->row;
5635d0c19d7SBarry Smith   const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
5645d0c19d7SBarry Smith   PetscInt        i, j;
5655d0c19d7SBarry Smith   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
5663bc0b13bSBarry Smith   PetscInt        bs = A->rmap->bs, bs2 = a->bs2;
5674c16a6a6SHong Zhang   MatScalar      *ba = b->a, *aa, *ap, *dk, *uik;
5684c16a6a6SHong Zhang   MatScalar      *u, *diag, *rtmp, *rtmp_ptr;
56928de702eSHong Zhang   MatScalar      *work;
57013f74950SBarry Smith   PetscInt       *pivots;
5715f8bbccaSHong Zhang   PetscBool       allowzeropivot, zeropivotdetected;
5724c16a6a6SHong Zhang 
5734c16a6a6SHong Zhang   PetscFunctionBegin;
5744c16a6a6SHong Zhang   /* initialization */
5759566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
5769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
5775f8bbccaSHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
5785f8bbccaSHong Zhang 
5796df5ee2eSHong Zhang   il[0] = 0;
5806df5ee2eSHong Zhang   for (i = 0; i < mbs; i++) jl[i] = mbs;
5816df5ee2eSHong Zhang 
5829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
5839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bs, &pivots));
5844c16a6a6SHong Zhang 
5859566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &perm_ptr));
5864c16a6a6SHong Zhang 
5874c16a6a6SHong Zhang   /* check permutation */
5884c16a6a6SHong Zhang   if (!a->permute) {
5899371c9d4SSatish Balay     ai = a->i;
5909371c9d4SSatish Balay     aj = a->j;
5919371c9d4SSatish Balay     aa = a->a;
5924c16a6a6SHong Zhang   } else {
5939371c9d4SSatish Balay     ai = a->inew;
5949371c9d4SSatish Balay     aj = a->jnew;
5959566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
5969566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
5979566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
5989566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
5994c16a6a6SHong Zhang 
6004c16a6a6SHong Zhang     for (i = 0; i < mbs; i++) {
6019371c9d4SSatish Balay       jmin = ai[i];
6029371c9d4SSatish Balay       jmax = ai[i + 1];
6034c16a6a6SHong Zhang       for (j = jmin; j < jmax; j++) {
6044c16a6a6SHong Zhang         while (a2anew[j] != j) {
6059371c9d4SSatish Balay           k         = a2anew[j];
6069371c9d4SSatish Balay           a2anew[j] = a2anew[k];
6079371c9d4SSatish Balay           a2anew[k] = k;
6084c16a6a6SHong Zhang           for (k1 = 0; k1 < bs2; k1++) {
6094c16a6a6SHong Zhang             dk[k1]           = aa[k * bs2 + k1];
6104c16a6a6SHong Zhang             aa[k * bs2 + k1] = aa[j * bs2 + k1];
6114c16a6a6SHong Zhang             aa[j * bs2 + k1] = dk[k1];
6124c16a6a6SHong Zhang           }
6134c16a6a6SHong Zhang         }
6144c16a6a6SHong Zhang         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
6154c16a6a6SHong Zhang         if (i > aj[j]) {
6164c16a6a6SHong Zhang           ap = aa + j * bs2;                       /* ptr to the beginning of j-th block of aa */
6174c16a6a6SHong Zhang           for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
6184c16a6a6SHong Zhang           for (k = 0; k < bs; k++) {               /* j-th block of aa <- dk^T */
6194c16a6a6SHong Zhang             for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
6204c16a6a6SHong Zhang           }
6214c16a6a6SHong Zhang         }
6224c16a6a6SHong Zhang       }
6234c16a6a6SHong Zhang     }
6249566063dSJacob Faibussowitsch     PetscCall(PetscFree(a2anew));
6254c16a6a6SHong Zhang   }
6264c16a6a6SHong Zhang 
6274c16a6a6SHong Zhang   /* for each row k */
6284c16a6a6SHong Zhang   for (k = 0; k < mbs; k++) {
6294c16a6a6SHong Zhang     /*initialize k-th row with elements nonzero in row perm(k) of A */
6309371c9d4SSatish Balay     jmin = ai[perm_ptr[k]];
6319371c9d4SSatish Balay     jmax = ai[perm_ptr[k] + 1];
632057f5ba7SHong Zhang 
6334c16a6a6SHong Zhang     ap = aa + jmin * bs2;
6344c16a6a6SHong Zhang     for (j = jmin; j < jmax; j++) {
6354c16a6a6SHong Zhang       vj       = perm_ptr[aj[j]]; /* block col. index */
6364c16a6a6SHong Zhang       rtmp_ptr = rtmp + vj * bs2;
6374c16a6a6SHong Zhang       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
6384c16a6a6SHong Zhang     }
6394c16a6a6SHong Zhang 
6404c16a6a6SHong Zhang     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
6419566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
6424c16a6a6SHong Zhang     i = jl[k]; /* first row to be added to k_th row  */
6434c16a6a6SHong Zhang 
644057f5ba7SHong Zhang     while (i < k) {
6454c16a6a6SHong Zhang       nexti = jl[i]; /* next row to be added to k_th row */
6464c16a6a6SHong Zhang 
6474c16a6a6SHong Zhang       /* compute multiplier */
6484c16a6a6SHong Zhang       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
6494c16a6a6SHong Zhang 
6504c16a6a6SHong Zhang       /* uik = -inv(Di)*U_bar(i,k) */
6514c16a6a6SHong Zhang       diag = ba + i * bs2;
6524c16a6a6SHong Zhang       u    = ba + ili * bs2;
6539566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(uik, bs2));
65496b95a6bSBarry Smith       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
6554c16a6a6SHong Zhang 
6564c16a6a6SHong Zhang       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
65796b95a6bSBarry Smith       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
6589566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(4.0 * bs * bs2));
6594c16a6a6SHong Zhang 
6604c16a6a6SHong Zhang       /* update -U(i,k) */
6619566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
6624c16a6a6SHong Zhang 
6634c16a6a6SHong Zhang       /* add multiple of row i to k-th row ... */
6649371c9d4SSatish Balay       jmin = ili + 1;
6659371c9d4SSatish Balay       jmax = bi[i + 1];
6664c16a6a6SHong Zhang       if (jmin < jmax) {
6674c16a6a6SHong Zhang         for (j = jmin; j < jmax; j++) {
6684c16a6a6SHong Zhang           /* rtmp += -U(i,k)^T * U_bar(i,j) */
6694c16a6a6SHong Zhang           rtmp_ptr = rtmp + bj[j] * bs2;
6704c16a6a6SHong Zhang           u        = ba + j * bs2;
67196b95a6bSBarry Smith           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
6724c16a6a6SHong Zhang         }
6739566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
6744c16a6a6SHong Zhang 
6754c16a6a6SHong Zhang         /* ... add i to row list for next nonzero entry */
6764c16a6a6SHong Zhang         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
6774c16a6a6SHong Zhang         j     = bj[jmin];
6789371c9d4SSatish Balay         jl[i] = jl[j];
6799371c9d4SSatish Balay         jl[j] = i; /* update jl */
6804c16a6a6SHong Zhang       }
6814c16a6a6SHong Zhang       i = nexti;
6824c16a6a6SHong Zhang     }
6834c16a6a6SHong Zhang 
6844c16a6a6SHong Zhang     /* save nonzero entries in k-th row of U ... */
6854c16a6a6SHong Zhang 
6864c16a6a6SHong Zhang     /* invert diagonal block */
6874c16a6a6SHong Zhang     diag = ba + k * bs2;
6889566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(diag, dk, bs2));
6895f8bbccaSHong Zhang 
6909566063dSJacob Faibussowitsch     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
6917b6c816cSBarry Smith     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
6924c16a6a6SHong Zhang 
6939371c9d4SSatish Balay     jmin = bi[k];
6949371c9d4SSatish Balay     jmax = bi[k + 1];
6954c16a6a6SHong Zhang     if (jmin < jmax) {
6964c16a6a6SHong Zhang       for (j = jmin; j < jmax; j++) {
6974c16a6a6SHong Zhang         vj       = bj[j]; /* block col. index of U */
6984c16a6a6SHong Zhang         u        = ba + j * bs2;
6994c16a6a6SHong Zhang         rtmp_ptr = rtmp + vj * bs2;
7004c16a6a6SHong Zhang         for (k1 = 0; k1 < bs2; k1++) {
7014c16a6a6SHong Zhang           *u++        = *rtmp_ptr;
7024c16a6a6SHong Zhang           *rtmp_ptr++ = 0.0;
7034c16a6a6SHong Zhang         }
7044c16a6a6SHong Zhang       }
7054c16a6a6SHong Zhang 
7064c16a6a6SHong Zhang       /* ... add k to row list for first nonzero entry in k-th row */
7074c16a6a6SHong Zhang       il[k] = jmin;
7084c16a6a6SHong Zhang       i     = bj[jmin];
7099371c9d4SSatish Balay       jl[k] = jl[i];
7109371c9d4SSatish Balay       jl[i] = k;
7114c16a6a6SHong Zhang     }
7124c16a6a6SHong Zhang   }
7134c16a6a6SHong Zhang 
7149566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
7159566063dSJacob Faibussowitsch   PetscCall(PetscFree2(il, jl));
7169566063dSJacob Faibussowitsch   PetscCall(PetscFree3(dk, uik, work));
7179566063dSJacob Faibussowitsch   PetscCall(PetscFree(pivots));
7181baa6e33SBarry Smith   if (a->permute) PetscCall(PetscFree(aa));
7194c16a6a6SHong Zhang 
7209566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &perm_ptr));
72126fbe8dcSKarl Rupp 
7224f79d315SHong Zhang   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
7234f79d315SHong Zhang   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
7244f79d315SHong Zhang   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
7254f79d315SHong Zhang   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;
726db4efbfdSBarry Smith 
7274c16a6a6SHong Zhang   C->assembled    = PETSC_TRUE;
7284c16a6a6SHong Zhang   C->preallocated = PETSC_TRUE;
72926fbe8dcSKarl Rupp 
7309566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
7314c16a6a6SHong Zhang   PetscFunctionReturn(0);
7324c16a6a6SHong Zhang }
733d4edadd4SHong Zhang 
734d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
735d71ae5a4SJacob Faibussowitsch {
736671cb588SHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
73713f74950SBarry Smith   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
73813f74950SBarry Smith   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
7393bc0b13bSBarry Smith   PetscInt      bs = A->rmap->bs, bs2 = a->bs2;
740671cb588SHong Zhang   MatScalar    *ba = b->a, *aa, *ap, *dk, *uik;
741671cb588SHong Zhang   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
74228de702eSHong Zhang   MatScalar    *work;
74313f74950SBarry Smith   PetscInt     *pivots;
7445f8bbccaSHong Zhang   PetscBool     allowzeropivot, zeropivotdetected;
745671cb588SHong Zhang 
746671cb588SHong Zhang   PetscFunctionBegin;
7479566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
7489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
7496df5ee2eSHong Zhang   il[0] = 0;
7506df5ee2eSHong Zhang   for (i = 0; i < mbs; i++) jl[i] = mbs;
7516df5ee2eSHong Zhang 
7529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
7539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bs, &pivots));
7545f8bbccaSHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
755671cb588SHong Zhang 
7569371c9d4SSatish Balay   ai = a->i;
7579371c9d4SSatish Balay   aj = a->j;
7589371c9d4SSatish Balay   aa = a->a;
759671cb588SHong Zhang 
760671cb588SHong Zhang   /* for each row k */
761671cb588SHong Zhang   for (k = 0; k < mbs; k++) {
762671cb588SHong Zhang     /*initialize k-th row with elements nonzero in row k of A */
7639371c9d4SSatish Balay     jmin = ai[k];
7649371c9d4SSatish Balay     jmax = ai[k + 1];
765671cb588SHong Zhang     ap   = aa + jmin * bs2;
766671cb588SHong Zhang     for (j = jmin; j < jmax; j++) {
767671cb588SHong Zhang       vj       = aj[j]; /* block col. index */
768671cb588SHong Zhang       rtmp_ptr = rtmp + vj * bs2;
769671cb588SHong Zhang       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
770671cb588SHong Zhang     }
771671cb588SHong Zhang 
772671cb588SHong Zhang     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
7739566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
774671cb588SHong Zhang     i = jl[k]; /* first row to be added to k_th row  */
775671cb588SHong Zhang 
776057f5ba7SHong Zhang     while (i < k) {
777671cb588SHong Zhang       nexti = jl[i]; /* next row to be added to k_th row */
778671cb588SHong Zhang 
779671cb588SHong Zhang       /* compute multiplier */
780671cb588SHong Zhang       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
781671cb588SHong Zhang 
782671cb588SHong Zhang       /* uik = -inv(Di)*U_bar(i,k) */
783671cb588SHong Zhang       diag = ba + i * bs2;
784671cb588SHong Zhang       u    = ba + ili * bs2;
7859566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(uik, bs2));
78696b95a6bSBarry Smith       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
787671cb588SHong Zhang 
788671cb588SHong Zhang       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
78996b95a6bSBarry Smith       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
7909566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs * bs2));
791671cb588SHong Zhang 
792671cb588SHong Zhang       /* update -U(i,k) */
7939566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
794671cb588SHong Zhang 
795671cb588SHong Zhang       /* add multiple of row i to k-th row ... */
7969371c9d4SSatish Balay       jmin = ili + 1;
7979371c9d4SSatish Balay       jmax = bi[i + 1];
798671cb588SHong Zhang       if (jmin < jmax) {
799671cb588SHong Zhang         for (j = jmin; j < jmax; j++) {
800671cb588SHong Zhang           /* rtmp += -U(i,k)^T * U_bar(i,j) */
801671cb588SHong Zhang           rtmp_ptr = rtmp + bj[j] * bs2;
802671cb588SHong Zhang           u        = ba + j * bs2;
80396b95a6bSBarry Smith           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
804671cb588SHong Zhang         }
8059566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
806671cb588SHong Zhang 
807671cb588SHong Zhang         /* ... add i to row list for next nonzero entry */
808671cb588SHong Zhang         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
809671cb588SHong Zhang         j     = bj[jmin];
8109371c9d4SSatish Balay         jl[i] = jl[j];
8119371c9d4SSatish Balay         jl[j] = i; /* update jl */
812671cb588SHong Zhang       }
813671cb588SHong Zhang       i = nexti;
814671cb588SHong Zhang     }
815671cb588SHong Zhang 
816671cb588SHong Zhang     /* save nonzero entries in k-th row of U ... */
817671cb588SHong Zhang 
818671cb588SHong Zhang     /* invert diagonal block */
819671cb588SHong Zhang     diag = ba + k * bs2;
8209566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(diag, dk, bs2));
8215f8bbccaSHong Zhang 
8229566063dSJacob Faibussowitsch     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
8237b6c816cSBarry Smith     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
824671cb588SHong Zhang 
8259371c9d4SSatish Balay     jmin = bi[k];
8269371c9d4SSatish Balay     jmax = bi[k + 1];
827671cb588SHong Zhang     if (jmin < jmax) {
828671cb588SHong Zhang       for (j = jmin; j < jmax; j++) {
829671cb588SHong Zhang         vj       = bj[j]; /* block col. index of U */
830671cb588SHong Zhang         u        = ba + j * bs2;
831671cb588SHong Zhang         rtmp_ptr = rtmp + vj * bs2;
832671cb588SHong Zhang         for (k1 = 0; k1 < bs2; k1++) {
833671cb588SHong Zhang           *u++        = *rtmp_ptr;
834671cb588SHong Zhang           *rtmp_ptr++ = 0.0;
835671cb588SHong Zhang         }
836671cb588SHong Zhang       }
837671cb588SHong Zhang 
838671cb588SHong Zhang       /* ... add k to row list for first nonzero entry in k-th row */
839671cb588SHong Zhang       il[k] = jmin;
840671cb588SHong Zhang       i     = bj[jmin];
8419371c9d4SSatish Balay       jl[k] = jl[i];
8429371c9d4SSatish Balay       jl[i] = k;
843671cb588SHong Zhang     }
844671cb588SHong Zhang   }
845671cb588SHong Zhang 
8469566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
8479566063dSJacob Faibussowitsch   PetscCall(PetscFree2(il, jl));
8489566063dSJacob Faibussowitsch   PetscCall(PetscFree3(dk, uik, work));
8499566063dSJacob Faibussowitsch   PetscCall(PetscFree(pivots));
850671cb588SHong Zhang 
8514f79d315SHong Zhang   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
8524f79d315SHong Zhang   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
8534f79d315SHong Zhang   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
8544f79d315SHong Zhang   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
855671cb588SHong Zhang   C->assembled           = PETSC_TRUE;
856671cb588SHong Zhang   C->preallocated        = PETSC_TRUE;
85726fbe8dcSKarl Rupp 
8589566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
859671cb588SHong Zhang   PetscFunctionReturn(0);
860671cb588SHong Zhang }
861671cb588SHong Zhang 
86249b5e25fSSatish Balay /*
863fcf159c0SHong Zhang     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
864cc0c071aSHong Zhang     Version for blocks 2 by 2.
86549b5e25fSSatish Balay */
866d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
867d71ae5a4SJacob Faibussowitsch {
86891602c66SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
869cc0c071aSHong Zhang   IS              perm = b->row;
8705d0c19d7SBarry Smith   const PetscInt *ai, *aj, *perm_ptr;
8715d0c19d7SBarry Smith   PetscInt        i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
8725d0c19d7SBarry Smith   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
873d8c74875SBarry Smith   MatScalar      *ba = b->a, *aa, *ap;
874d8c74875SBarry Smith   MatScalar      *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
87514d2772eSHong Zhang   PetscReal       shift = info->shiftamount;
876a455e926SHong Zhang   PetscBool       allowzeropivot, zeropivotdetected;
87749b5e25fSSatish Balay 
87849b5e25fSSatish Balay   PetscFunctionBegin;
8790164db54SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
8800164db54SHong Zhang 
88191602c66SHong Zhang   /* initialization */
88291602c66SHong Zhang   /* il and jl record the first nonzero element in each row of the accessing
88391602c66SHong Zhang      window U(0:k, k:mbs-1).
88491602c66SHong Zhang      jl:    list of rows to be added to uneliminated rows
88591602c66SHong Zhang             i>= k: jl(i) is the first row to be added to row i
88691602c66SHong Zhang             i<  k: jl(i) is the row following row i in some list of rows
88791602c66SHong Zhang             jl(i) = mbs indicates the end of a list
88891602c66SHong Zhang      il(i): points to the first nonzero element in columns k,...,mbs-1 of
88991602c66SHong Zhang             row i of U */
8909566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
8919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
8926df5ee2eSHong Zhang   il[0] = 0;
8936df5ee2eSHong Zhang   for (i = 0; i < mbs; i++) jl[i] = mbs;
8946df5ee2eSHong Zhang 
8959566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &perm_ptr));
896cc0c071aSHong Zhang 
897cc0c071aSHong Zhang   /* check permutation */
898cc0c071aSHong Zhang   if (!a->permute) {
8999371c9d4SSatish Balay     ai = a->i;
9009371c9d4SSatish Balay     aj = a->j;
9019371c9d4SSatish Balay     aa = a->a;
902cc0c071aSHong Zhang   } else {
9039371c9d4SSatish Balay     ai = a->inew;
9049371c9d4SSatish Balay     aj = a->jnew;
9059566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
9069566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
9079566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
9089566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
909cc0c071aSHong Zhang 
910cc0c071aSHong Zhang     for (i = 0; i < mbs; i++) {
9119371c9d4SSatish Balay       jmin = ai[i];
9129371c9d4SSatish Balay       jmax = ai[i + 1];
913cc0c071aSHong Zhang       for (j = jmin; j < jmax; j++) {
914cc0c071aSHong Zhang         while (a2anew[j] != j) {
9159371c9d4SSatish Balay           k         = a2anew[j];
9169371c9d4SSatish Balay           a2anew[j] = a2anew[k];
9179371c9d4SSatish Balay           a2anew[k] = k;
918cc0c071aSHong Zhang           for (k1 = 0; k1 < 4; k1++) {
919cc0c071aSHong Zhang             dk[k1]         = aa[k * 4 + k1];
920cc0c071aSHong Zhang             aa[k * 4 + k1] = aa[j * 4 + k1];
921cc0c071aSHong Zhang             aa[j * 4 + k1] = dk[k1];
922cc0c071aSHong Zhang           }
923cc0c071aSHong Zhang         }
924cc0c071aSHong Zhang         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
925cc0c071aSHong Zhang         if (i > aj[j]) {
926cc0c071aSHong Zhang           ap    = aa + j * 4; /* ptr to the beginning of the block */
927cc0c071aSHong Zhang           dk[1] = ap[1];      /* swap ap[1] and ap[2] */
928cc0c071aSHong Zhang           ap[1] = ap[2];
929cc0c071aSHong Zhang           ap[2] = dk[1];
930cc0c071aSHong Zhang         }
931cc0c071aSHong Zhang       }
932cc0c071aSHong Zhang     }
9339566063dSJacob Faibussowitsch     PetscCall(PetscFree(a2anew));
934cc0c071aSHong Zhang   }
9353845f261SHong Zhang 
93691602c66SHong Zhang   /* for each row k */
93791602c66SHong Zhang   for (k = 0; k < mbs; k++) {
93891602c66SHong Zhang     /*initialize k-th row with elements nonzero in row perm(k) of A */
9399371c9d4SSatish Balay     jmin = ai[perm_ptr[k]];
9409371c9d4SSatish Balay     jmax = ai[perm_ptr[k] + 1];
941cc0c071aSHong Zhang     ap   = aa + jmin * 4;
94291602c66SHong Zhang     for (j = jmin; j < jmax; j++) {
943cc0c071aSHong Zhang       vj       = perm_ptr[aj[j]]; /* block col. index */
944cc0c071aSHong Zhang       rtmp_ptr = rtmp + vj * 4;
945cc0c071aSHong Zhang       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
94691602c66SHong Zhang     }
94791602c66SHong Zhang 
94891602c66SHong Zhang     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
9499566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
95091602c66SHong Zhang     i = jl[k]; /* first row to be added to k_th row  */
95191602c66SHong Zhang 
952057f5ba7SHong Zhang     while (i < k) {
95391602c66SHong Zhang       nexti = jl[i]; /* next row to be added to k_th row */
95491602c66SHong Zhang 
9553845f261SHong Zhang       /* compute multiplier */
95691602c66SHong Zhang       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
9573845f261SHong Zhang 
9583845f261SHong Zhang       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
959cc0c071aSHong Zhang       diag   = ba + i * 4;
960cc0c071aSHong Zhang       u      = ba + ili * 4;
961cc0c071aSHong Zhang       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
962cc0c071aSHong Zhang       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
963cc0c071aSHong Zhang       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
964cc0c071aSHong Zhang       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
9653845f261SHong Zhang 
9663845f261SHong Zhang       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
967cc0c071aSHong Zhang       dk[0] += uik[0] * u[0] + uik[1] * u[1];
968cc0c071aSHong Zhang       dk[1] += uik[2] * u[0] + uik[3] * u[1];
969cc0c071aSHong Zhang       dk[2] += uik[0] * u[2] + uik[1] * u[3];
970cc0c071aSHong Zhang       dk[3] += uik[2] * u[2] + uik[3] * u[3];
9713845f261SHong Zhang 
9729566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(16.0 * 2.0));
973187a9f4bSHong Zhang 
9743845f261SHong Zhang       /* update -U(i,k): ba[ili] = uik */
9759566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
97691602c66SHong Zhang 
97791602c66SHong Zhang       /* add multiple of row i to k-th row ... */
9789371c9d4SSatish Balay       jmin = ili + 1;
9799371c9d4SSatish Balay       jmax = bi[i + 1];
98091602c66SHong Zhang       if (jmin < jmax) {
9813845f261SHong Zhang         for (j = jmin; j < jmax; j++) {
9823845f261SHong Zhang           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
983cc0c071aSHong Zhang           rtmp_ptr = rtmp + bj[j] * 4;
984cc0c071aSHong Zhang           u        = ba + j * 4;
985cc0c071aSHong Zhang           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
986cc0c071aSHong Zhang           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
987cc0c071aSHong Zhang           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
988cc0c071aSHong Zhang           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
9893845f261SHong Zhang         }
9909566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
9913845f261SHong Zhang 
99291602c66SHong Zhang         /* ... add i to row list for next nonzero entry */
99391602c66SHong Zhang         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
99491602c66SHong Zhang         j     = bj[jmin];
9959371c9d4SSatish Balay         jl[i] = jl[j];
9969371c9d4SSatish Balay         jl[j] = i; /* update jl */
99791602c66SHong Zhang       }
998a1723e09SHong Zhang       i = nexti;
99991602c66SHong Zhang     }
1000cc0c071aSHong Zhang 
100191602c66SHong Zhang     /* save nonzero entries in k-th row of U ... */
10023845f261SHong Zhang 
1003cc0c071aSHong Zhang     /* invert diagonal block */
1004cc0c071aSHong Zhang     diag = ba + k * 4;
10059566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(diag, dk, 4));
10069566063dSJacob Faibussowitsch     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
10077b6c816cSBarry Smith     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
10083845f261SHong Zhang 
10099371c9d4SSatish Balay     jmin = bi[k];
10109371c9d4SSatish Balay     jmax = bi[k + 1];
101191602c66SHong Zhang     if (jmin < jmax) {
101291602c66SHong Zhang       for (j = jmin; j < jmax; j++) {
1013cc0c071aSHong Zhang         vj       = bj[j]; /* block col. index of U */
1014cc0c071aSHong Zhang         u        = ba + j * 4;
1015cc0c071aSHong Zhang         rtmp_ptr = rtmp + vj * 4;
1016cc0c071aSHong Zhang         for (k1 = 0; k1 < 4; k1++) {
1017cc0c071aSHong Zhang           *u++        = *rtmp_ptr;
1018cc0c071aSHong Zhang           *rtmp_ptr++ = 0.0;
1019cc0c071aSHong Zhang         }
1020cc0c071aSHong Zhang       }
10213845f261SHong Zhang 
102291602c66SHong Zhang       /* ... add k to row list for first nonzero entry in k-th row */
102391602c66SHong Zhang       il[k] = jmin;
102491602c66SHong Zhang       i     = bj[jmin];
10259371c9d4SSatish Balay       jl[k] = jl[i];
10269371c9d4SSatish Balay       jl[i] = k;
102791602c66SHong Zhang     }
102891602c66SHong Zhang   }
10293845f261SHong Zhang 
10309566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
10319566063dSJacob Faibussowitsch   PetscCall(PetscFree2(il, jl));
10321baa6e33SBarry Smith   if (a->permute) PetscCall(PetscFree(aa));
10339566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &perm_ptr));
103426fbe8dcSKarl Rupp 
10354f79d315SHong Zhang   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
10364f79d315SHong Zhang   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
103749b5e25fSSatish Balay   C->assembled           = PETSC_TRUE;
10385c0bcdfcSHong Zhang   C->preallocated        = PETSC_TRUE;
103926fbe8dcSKarl Rupp 
10409566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
104149b5e25fSSatish Balay   PetscFunctionReturn(0);
104249b5e25fSSatish Balay }
104391602c66SHong Zhang 
104449b5e25fSSatish Balay /*
104549b5e25fSSatish Balay       Version for when blocks are 2 by 2 Using natural ordering
104649b5e25fSSatish Balay */
1047d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1048d71ae5a4SJacob Faibussowitsch {
1049ab27746eSHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
105013f74950SBarry Smith   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
105113f74950SBarry Smith   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1052d8c74875SBarry Smith   MatScalar    *ba = b->a, *aa, *ap, dk[8], uik[8];
1053ab27746eSHong Zhang   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
105414d2772eSHong Zhang   PetscReal     shift = info->shiftamount;
1055a455e926SHong Zhang   PetscBool     allowzeropivot, zeropivotdetected;
105649b5e25fSSatish Balay 
105749b5e25fSSatish Balay   PetscFunctionBegin;
10580164db54SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
10590164db54SHong Zhang 
1060ab27746eSHong Zhang   /* initialization */
1061ab27746eSHong Zhang   /* il and jl record the first nonzero element in each row of the accessing
1062ab27746eSHong Zhang      window U(0:k, k:mbs-1).
1063ab27746eSHong Zhang      jl:    list of rows to be added to uneliminated rows
1064ab27746eSHong Zhang             i>= k: jl(i) is the first row to be added to row i
1065ab27746eSHong Zhang             i<  k: jl(i) is the row following row i in some list of rows
1066ab27746eSHong Zhang             jl(i) = mbs indicates the end of a list
1067ab27746eSHong Zhang      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1068ab27746eSHong Zhang             row i of U */
10699566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
10709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
10716df5ee2eSHong Zhang   il[0] = 0;
10726df5ee2eSHong Zhang   for (i = 0; i < mbs; i++) jl[i] = mbs;
10736df5ee2eSHong Zhang 
10749371c9d4SSatish Balay   ai = a->i;
10759371c9d4SSatish Balay   aj = a->j;
10769371c9d4SSatish Balay   aa = a->a;
1077ab27746eSHong Zhang 
1078ab27746eSHong Zhang   /* for each row k */
1079ab27746eSHong Zhang   for (k = 0; k < mbs; k++) {
1080ab27746eSHong Zhang     /*initialize k-th row with elements nonzero in row k of A */
10819371c9d4SSatish Balay     jmin = ai[k];
10829371c9d4SSatish Balay     jmax = ai[k + 1];
1083ab27746eSHong Zhang     ap   = aa + jmin * 4;
1084ab27746eSHong Zhang     for (j = jmin; j < jmax; j++) {
1085ab27746eSHong Zhang       vj       = aj[j]; /* block col. index */
1086ab27746eSHong Zhang       rtmp_ptr = rtmp + vj * 4;
1087ab27746eSHong Zhang       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
108849b5e25fSSatish Balay     }
1089ab27746eSHong Zhang 
1090ab27746eSHong Zhang     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
10919566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1092ab27746eSHong Zhang     i = jl[k]; /* first row to be added to k_th row  */
1093ab27746eSHong Zhang 
1094057f5ba7SHong Zhang     while (i < k) {
1095ab27746eSHong Zhang       nexti = jl[i]; /* next row to be added to k_th row */
1096ab27746eSHong Zhang 
1097ab27746eSHong Zhang       /* compute multiplier */
1098ab27746eSHong Zhang       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1099ab27746eSHong Zhang 
1100ab27746eSHong Zhang       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1101ab27746eSHong Zhang       diag   = ba + i * 4;
1102ab27746eSHong Zhang       u      = ba + ili * 4;
1103ab27746eSHong Zhang       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1104ab27746eSHong Zhang       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1105ab27746eSHong Zhang       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1106ab27746eSHong Zhang       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
1107ab27746eSHong Zhang 
1108ab27746eSHong Zhang       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1109ab27746eSHong Zhang       dk[0] += uik[0] * u[0] + uik[1] * u[1];
1110ab27746eSHong Zhang       dk[1] += uik[2] * u[0] + uik[3] * u[1];
1111ab27746eSHong Zhang       dk[2] += uik[0] * u[2] + uik[1] * u[3];
1112ab27746eSHong Zhang       dk[3] += uik[2] * u[2] + uik[3] * u[3];
1113ab27746eSHong Zhang 
11149566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(16.0 * 2.0));
1115187a9f4bSHong Zhang 
1116ab27746eSHong Zhang       /* update -U(i,k): ba[ili] = uik */
11179566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
1118ab27746eSHong Zhang 
1119ab27746eSHong Zhang       /* add multiple of row i to k-th row ... */
11209371c9d4SSatish Balay       jmin = ili + 1;
11219371c9d4SSatish Balay       jmax = bi[i + 1];
1122ab27746eSHong Zhang       if (jmin < jmax) {
1123ab27746eSHong Zhang         for (j = jmin; j < jmax; j++) {
1124ab27746eSHong Zhang           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1125ab27746eSHong Zhang           rtmp_ptr = rtmp + bj[j] * 4;
1126ab27746eSHong Zhang           u        = ba + j * 4;
1127ab27746eSHong Zhang           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1128ab27746eSHong Zhang           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1129ab27746eSHong Zhang           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1130ab27746eSHong Zhang           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
113149b5e25fSSatish Balay         }
11329566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
1133ab27746eSHong Zhang 
1134ab27746eSHong Zhang         /* ... add i to row list for next nonzero entry */
1135ab27746eSHong Zhang         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1136ab27746eSHong Zhang         j     = bj[jmin];
11379371c9d4SSatish Balay         jl[i] = jl[j];
11389371c9d4SSatish Balay         jl[j] = i; /* update jl */
113949b5e25fSSatish Balay       }
1140ab27746eSHong Zhang       i = nexti;
114149b5e25fSSatish Balay     }
1142ab27746eSHong Zhang 
1143ab27746eSHong Zhang     /* save nonzero entries in k-th row of U ... */
1144ab27746eSHong Zhang 
114549b5e25fSSatish Balay     /* invert diagonal block */
1146ab27746eSHong Zhang     diag = ba + k * 4;
11479566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(diag, dk, 4));
11489566063dSJacob Faibussowitsch     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
11497b6c816cSBarry Smith     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1150ab27746eSHong Zhang 
11519371c9d4SSatish Balay     jmin = bi[k];
11529371c9d4SSatish Balay     jmax = bi[k + 1];
1153ab27746eSHong Zhang     if (jmin < jmax) {
1154ab27746eSHong Zhang       for (j = jmin; j < jmax; j++) {
1155ab27746eSHong Zhang         vj       = bj[j]; /* block col. index of U */
1156ab27746eSHong Zhang         u        = ba + j * 4;
1157ab27746eSHong Zhang         rtmp_ptr = rtmp + vj * 4;
1158ab27746eSHong Zhang         for (k1 = 0; k1 < 4; k1++) {
1159ab27746eSHong Zhang           *u++        = *rtmp_ptr;
1160ab27746eSHong Zhang           *rtmp_ptr++ = 0.0;
1161ab27746eSHong Zhang         }
1162ab27746eSHong Zhang       }
1163ab27746eSHong Zhang 
1164ab27746eSHong Zhang       /* ... add k to row list for first nonzero entry in k-th row */
1165ab27746eSHong Zhang       il[k] = jmin;
1166ab27746eSHong Zhang       i     = bj[jmin];
11679371c9d4SSatish Balay       jl[k] = jl[i];
11689371c9d4SSatish Balay       jl[i] = k;
1169ab27746eSHong Zhang     }
117049b5e25fSSatish Balay   }
117149b5e25fSSatish Balay 
11729566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
11739566063dSJacob Faibussowitsch   PetscCall(PetscFree2(il, jl));
1174ab27746eSHong Zhang 
11754f79d315SHong Zhang   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
11764f79d315SHong Zhang   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
11774f79d315SHong Zhang   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
11784f79d315SHong Zhang   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
117949b5e25fSSatish Balay   C->assembled           = PETSC_TRUE;
11805c0bcdfcSHong Zhang   C->preallocated        = PETSC_TRUE;
118126fbe8dcSKarl Rupp 
11829566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
118349b5e25fSSatish Balay   PetscFunctionReturn(0);
118449b5e25fSSatish Balay }
118549b5e25fSSatish Balay 
118649b5e25fSSatish Balay /*
118798a8e78dSHong Zhang     Numeric U^T*D*U factorization for SBAIJ format.
118891602c66SHong Zhang     Version for blocks are 1 by 1.
118949b5e25fSSatish Balay */
1190d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1191d71ae5a4SJacob Faibussowitsch {
119249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
119349b5e25fSSatish Balay   IS              ip = b->row;
11945d0c19d7SBarry Smith   const PetscInt *ai, *aj, *rip;
11955d0c19d7SBarry Smith   PetscInt       *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1196997a0949SHong Zhang   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1197997a0949SHong Zhang   MatScalar      *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
11980e95ead3SHong Zhang   PetscReal       rs;
11990e95ead3SHong Zhang   FactorShiftCtx  sctx;
120049b5e25fSSatish Balay 
120149b5e25fSSatish Balay   PetscFunctionBegin;
12020e95ead3SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
12039566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
12043cea4cbeSHong Zhang 
12059566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
1206cb718733SHong Zhang   if (!a->permute) {
12079371c9d4SSatish Balay     ai = a->i;
12089371c9d4SSatish Balay     aj = a->j;
12099371c9d4SSatish Balay     aa = a->a;
12102d007305SHong Zhang   } else {
12119371c9d4SSatish Balay     ai = a->inew;
12129371c9d4SSatish Balay     aj = a->jnew;
1213fff829cfSHong Zhang     nz = ai[mbs];
12149566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &aa));
1215fff829cfSHong Zhang     a2anew = a->a2anew;
1216997a0949SHong Zhang     bval   = a->a;
1217ad540459SPierre Jolivet     for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
12182d007305SHong Zhang   }
12192d007305SHong Zhang 
122091602c66SHong Zhang   /* initialization */
122149b5e25fSSatish Balay   /* il and jl record the first nonzero element in each row of the accessing
122249b5e25fSSatish Balay      window U(0:k, k:mbs-1).
122349b5e25fSSatish Balay      jl:    list of rows to be added to uneliminated rows
122449b5e25fSSatish Balay             i>= k: jl(i) is the first row to be added to row i
122549b5e25fSSatish Balay             i<  k: jl(i) is the row following row i in some list of rows
122649b5e25fSSatish Balay             jl(i) = mbs indicates the end of a list
122749b5e25fSSatish Balay      il(i): points to the first nonzero element in columns k,...,mbs-1 of
122849b5e25fSSatish Balay             row i of U */
12299566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1230997a0949SHong Zhang 
1231997a0949SHong Zhang   do {
123207b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
12336df5ee2eSHong Zhang     il[0]         = 0;
123449b5e25fSSatish Balay     for (i = 0; i < mbs; i++) {
12359371c9d4SSatish Balay       rtmp[i] = 0.0;
12369371c9d4SSatish Balay       jl[i]   = mbs;
123749b5e25fSSatish Balay     }
1238997a0949SHong Zhang 
123949b5e25fSSatish Balay     for (k = 0; k < mbs; k++) {
1240997a0949SHong Zhang       /*initialize k-th row by the perm[k]-th row of A */
12419371c9d4SSatish Balay       jmin = ai[rip[k]];
12429371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
12437701de02SHong Zhang       bval = ba + bi[k];
124449b5e25fSSatish Balay       for (j = jmin; j < jmax; j++) {
1245997a0949SHong Zhang         col       = rip[aj[j]];
1246997a0949SHong Zhang         rtmp[col] = aa[j];
12477701de02SHong Zhang         *bval++   = 0.0; /* for in-place factorization */
124849b5e25fSSatish Balay       }
12493cea4cbeSHong Zhang 
12503cea4cbeSHong Zhang       /* shift the diagonal of the matrix */
12513cea4cbeSHong Zhang       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
125249b5e25fSSatish Balay 
125391602c66SHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
125449b5e25fSSatish Balay       dk = rtmp[k];
125549b5e25fSSatish Balay       i  = jl[k]; /* first row to be added to k_th row  */
125649b5e25fSSatish Balay 
1257057f5ba7SHong Zhang       while (i < k) {
125849b5e25fSSatish Balay         nexti = jl[i]; /* next row to be added to k_th row */
125949b5e25fSSatish Balay 
1260fff829cfSHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
126149b5e25fSSatish Balay         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1262997a0949SHong Zhang         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
126349b5e25fSSatish Balay         dk += uikdi * ba[ili];
1264658e7b3eSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
126549b5e25fSSatish Balay 
1266997a0949SHong Zhang         /* add multiple of row i to k-th row */
12679371c9d4SSatish Balay         jmin = ili + 1;
12689371c9d4SSatish Balay         jmax = bi[i + 1];
126949b5e25fSSatish Balay         if (jmin < jmax) {
127049b5e25fSSatish Balay           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
12719566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));
1272187a9f4bSHong Zhang 
1273fff829cfSHong Zhang           /* update il and jl for row i */
1274fff829cfSHong Zhang           il[i] = jmin;
12759371c9d4SSatish Balay           j     = bj[jmin];
12769371c9d4SSatish Balay           jl[i] = jl[j];
12779371c9d4SSatish Balay           jl[j] = i;
127849b5e25fSSatish Balay         }
1279ab27746eSHong Zhang         i = nexti;
128049b5e25fSSatish Balay       }
128149b5e25fSSatish Balay 
12823cea4cbeSHong Zhang       /* shift the diagonals when zero pivot is detected */
12833cea4cbeSHong Zhang       /* compute rs=sum of abs(off-diagonal) */
12843cea4cbeSHong Zhang       rs   = 0.0;
1285997a0949SHong Zhang       jmin = bi[k] + 1;
1286997a0949SHong Zhang       nz   = bi[k + 1] - jmin;
1287997a0949SHong Zhang       if (nz) {
1288997a0949SHong Zhang         bcol = bj + jmin;
1289997a0949SHong Zhang         while (nz--) {
129089f845c8SHong Zhang           rs += PetscAbsScalar(rtmp[*bcol]);
129189f845c8SHong Zhang           bcol++;
1292997a0949SHong Zhang         }
1293997a0949SHong Zhang       }
12943cea4cbeSHong Zhang 
12953cea4cbeSHong Zhang       sctx.rs = rs;
12963cea4cbeSHong Zhang       sctx.pv = dk;
12979566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
129807b50cabSHong Zhang       if (sctx.newshift) break; /* sctx.shift_amount is updated */
12990e95ead3SHong Zhang       dk = sctx.pv;
130049b5e25fSSatish Balay 
1301997a0949SHong Zhang       /* copy data into U(k,:) */
130298a8e78dSHong Zhang       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
13039371c9d4SSatish Balay       jmin      = bi[k] + 1;
13049371c9d4SSatish Balay       jmax      = bi[k + 1];
130549b5e25fSSatish Balay       if (jmin < jmax) {
130649b5e25fSSatish Balay         for (j = jmin; j < jmax; j++) {
13079371c9d4SSatish Balay           col       = bj[j];
13089371c9d4SSatish Balay           ba[j]     = rtmp[col];
13099371c9d4SSatish Balay           rtmp[col] = 0.0;
131049b5e25fSSatish Balay         }
131198a8e78dSHong Zhang         /* add the k-th row into il and jl */
131249b5e25fSSatish Balay         il[k] = jmin;
13139371c9d4SSatish Balay         i     = bj[jmin];
13149371c9d4SSatish Balay         jl[k] = jl[i];
13159371c9d4SSatish Balay         jl[i] = k;
131649b5e25fSSatish Balay       }
131749b5e25fSSatish Balay     }
131807b50cabSHong Zhang   } while (sctx.newshift);
13199566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, jl));
13209566063dSJacob Faibussowitsch   if (a->permute) PetscCall(PetscFree(aa));
132149b5e25fSSatish Balay 
13229566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
132326fbe8dcSKarl Rupp 
13240a3c351aSHong Zhang   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
13254f79d315SHong Zhang   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
13260a3c351aSHong Zhang   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
13270a3c351aSHong Zhang   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
13280a3c351aSHong Zhang   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
132949b5e25fSSatish Balay   C->assembled           = PETSC_TRUE;
13305c0bcdfcSHong Zhang   C->preallocated        = PETSC_TRUE;
133126fbe8dcSKarl Rupp 
13329566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->N));
13333cea4cbeSHong Zhang   if (sctx.nshift) {
1334f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
13359566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1336f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
13379566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1338997a0949SHong Zhang     }
1339997a0949SHong Zhang   }
134049b5e25fSSatish Balay   PetscFunctionReturn(0);
134149b5e25fSSatish Balay }
134249b5e25fSSatish Balay 
1343671cb588SHong Zhang /*
134480d3d5a7SHong Zhang   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
134580d3d5a7SHong Zhang   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1346671cb588SHong Zhang */
1347d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1348d71ae5a4SJacob Faibussowitsch {
1349d595f711SHong Zhang   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data;
13507b056e98SHong Zhang   Mat_SeqSBAIJ  *b = (Mat_SeqSBAIJ *)B->data;
1351d595f711SHong Zhang   PetscInt       i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1352545dd064SHong Zhang   PetscInt      *ai = a->i, *aj = a->j, *ajtmp;
1353d595f711SHong Zhang   PetscInt       k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1354d595f711SHong Zhang   MatScalar     *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1355d90ac83dSHong Zhang   FactorShiftCtx sctx;
1356d595f711SHong Zhang   PetscReal      rs;
1357d595f711SHong Zhang   MatScalar      d, *v;
1358d595f711SHong Zhang 
1359d595f711SHong Zhang   PetscFunctionBegin;
13609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1361545dd064SHong Zhang 
1362d595f711SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
13639566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1364d595f711SHong Zhang 
1365f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1366d595f711SHong Zhang     sctx.shift_top = info->zeropivot;
136726fbe8dcSKarl Rupp 
13689566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(rtmp, mbs));
136926fbe8dcSKarl Rupp 
1370d595f711SHong Zhang     for (i = 0; i < mbs; i++) {
1371d595f711SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1372d595f711SHong Zhang       d = (aa)[a->diag[i]];
1373545dd064SHong Zhang       rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1374545dd064SHong Zhang       ajtmp = aj + ai[i] + 1;       /* exclude diagonal */
1375545dd064SHong Zhang       v     = aa + ai[i] + 1;
1376545dd064SHong Zhang       nz    = ai[i + 1] - ai[i] - 1;
1377545dd064SHong Zhang       for (j = 0; j < nz; j++) {
1378545dd064SHong Zhang         rtmp[i] += PetscAbsScalar(v[j]);
1379545dd064SHong Zhang         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1380545dd064SHong Zhang       }
13810838c725SBarry Smith       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1382d595f711SHong Zhang     }
1383d595f711SHong Zhang     sctx.shift_top *= 1.1;
1384d595f711SHong Zhang     sctx.nshift_max = 5;
1385d595f711SHong Zhang     sctx.shift_lo   = 0.;
1386d595f711SHong Zhang     sctx.shift_hi   = 1.;
1387d595f711SHong Zhang   }
1388d595f711SHong Zhang 
1389d595f711SHong Zhang   /* allocate working arrays
1390d595f711SHong Zhang      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1391d595f711SHong Zhang      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1392d595f711SHong Zhang   */
1393d595f711SHong Zhang   do {
139407b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
1395d595f711SHong Zhang 
1396d595f711SHong Zhang     for (i = 0; i < mbs; i++) c2r[i] = mbs;
13979e95ef84SSatish Balay     if (mbs) il[0] = 0;
1398d595f711SHong Zhang 
1399d595f711SHong Zhang     for (k = 0; k < mbs; k++) {
1400d595f711SHong Zhang       /* zero rtmp */
1401d595f711SHong Zhang       nz    = bi[k + 1] - bi[k];
1402d595f711SHong Zhang       bjtmp = bj + bi[k];
14037b056e98SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1404d595f711SHong Zhang 
1405d595f711SHong Zhang       /* load in initial unfactored row */
1406d595f711SHong Zhang       bval = ba + bi[k];
14079371c9d4SSatish Balay       jmin = ai[k];
14089371c9d4SSatish Balay       jmax = ai[k + 1];
1409d595f711SHong Zhang       for (j = jmin; j < jmax; j++) {
1410d595f711SHong Zhang         col       = aj[j];
1411d595f711SHong Zhang         rtmp[col] = aa[j];
1412d595f711SHong Zhang         *bval++   = 0.0; /* for in-place factorization */
1413d595f711SHong Zhang       }
1414d595f711SHong Zhang       /* shift the diagonal of the matrix: ZeropivotApply() */
1415d595f711SHong Zhang       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1416d595f711SHong Zhang 
1417d595f711SHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1418d595f711SHong Zhang       dk = rtmp[k];
1419d595f711SHong Zhang       i  = c2r[k]; /* first row to be added to k_th row  */
1420d595f711SHong Zhang 
1421d595f711SHong Zhang       while (i < k) {
1422d595f711SHong Zhang         nexti = c2r[i]; /* next row to be added to k_th row */
1423d595f711SHong Zhang 
1424d595f711SHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
1425d595f711SHong Zhang         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1426d595f711SHong Zhang         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1427d595f711SHong Zhang         dk += uikdi * ba[ili];           /* update diag[k] */
1428d595f711SHong Zhang         ba[ili] = uikdi;                 /* -U(i,k) */
1429d595f711SHong Zhang 
1430d595f711SHong Zhang         /* add multiple of row i to k-th row */
14319371c9d4SSatish Balay         jmin = ili + 1;
14329371c9d4SSatish Balay         jmax = bi[i + 1];
1433d595f711SHong Zhang         if (jmin < jmax) {
1434d595f711SHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1435d595f711SHong Zhang           /* update il and c2r for row i */
1436d595f711SHong Zhang           il[i]  = jmin;
14379371c9d4SSatish Balay           j      = bj[jmin];
14389371c9d4SSatish Balay           c2r[i] = c2r[j];
14399371c9d4SSatish Balay           c2r[j] = i;
1440d595f711SHong Zhang         }
1441d595f711SHong Zhang         i = nexti;
1442d595f711SHong Zhang       }
1443d595f711SHong Zhang 
1444d595f711SHong Zhang       /* copy data into U(k,:) */
1445d595f711SHong Zhang       rs   = 0.0;
14469371c9d4SSatish Balay       jmin = bi[k];
14479371c9d4SSatish Balay       jmax = bi[k + 1] - 1;
1448d595f711SHong Zhang       if (jmin < jmax) {
1449d595f711SHong Zhang         for (j = jmin; j < jmax; j++) {
14509371c9d4SSatish Balay           col   = bj[j];
14519371c9d4SSatish Balay           ba[j] = rtmp[col];
14529371c9d4SSatish Balay           rs += PetscAbsScalar(ba[j]);
1453d595f711SHong Zhang         }
1454d595f711SHong Zhang         /* add the k-th row into il and c2r */
1455d595f711SHong Zhang         il[k]  = jmin;
14569371c9d4SSatish Balay         i      = bj[jmin];
14579371c9d4SSatish Balay         c2r[k] = c2r[i];
14589371c9d4SSatish Balay         c2r[i] = k;
1459d595f711SHong Zhang       }
1460d595f711SHong Zhang 
1461d595f711SHong Zhang       sctx.rs = rs;
1462d595f711SHong Zhang       sctx.pv = dk;
14639566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
146407b50cabSHong Zhang       if (sctx.newshift) break;
1465d595f711SHong Zhang       dk = sctx.pv;
1466d595f711SHong Zhang 
1467d595f711SHong Zhang       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1468d595f711SHong Zhang     }
146907b50cabSHong Zhang   } while (sctx.newshift);
1470d595f711SHong Zhang 
14719566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, c2r));
1472d595f711SHong Zhang 
1473d595f711SHong Zhang   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
14749dcdb97aSHong Zhang   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1475d595f711SHong Zhang   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1476910cf402Sprj-   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
147780d3d5a7SHong Zhang   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
147880d3d5a7SHong Zhang   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1479d595f711SHong Zhang 
14807b056e98SHong Zhang   B->assembled    = PETSC_TRUE;
14817b056e98SHong Zhang   B->preallocated = PETSC_TRUE;
148226fbe8dcSKarl Rupp 
14839566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->rmap->n));
1484d595f711SHong Zhang 
1485d595f711SHong Zhang   /* MatPivotView() */
1486d595f711SHong Zhang   if (sctx.nshift) {
1487f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
14889566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1489f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
14909566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1491f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
14929566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1493d595f711SHong Zhang     }
1494d595f711SHong Zhang   }
1495d595f711SHong Zhang   PetscFunctionReturn(0);
1496d595f711SHong Zhang }
1497d595f711SHong Zhang 
1498d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1499d71ae5a4SJacob Faibussowitsch {
1500671cb588SHong Zhang   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
150113f74950SBarry Smith   PetscInt       i, j, mbs = a->mbs;
150213f74950SBarry Smith   PetscInt      *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
15033cea4cbeSHong Zhang   PetscInt       k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1504653a6975SHong Zhang   MatScalar     *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
150514d2772eSHong Zhang   PetscReal      rs;
15060e95ead3SHong Zhang   FactorShiftCtx sctx;
1507653a6975SHong Zhang 
1508653a6975SHong Zhang   PetscFunctionBegin;
15090e95ead3SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
15109566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
15110e95ead3SHong Zhang 
1512653a6975SHong Zhang   /* initialization */
1513653a6975SHong Zhang   /* il and jl record the first nonzero element in each row of the accessing
1514653a6975SHong Zhang      window U(0:k, k:mbs-1).
1515653a6975SHong Zhang      jl:    list of rows to be added to uneliminated rows
1516653a6975SHong Zhang             i>= k: jl(i) is the first row to be added to row i
1517653a6975SHong Zhang             i<  k: jl(i) is the row following row i in some list of rows
1518653a6975SHong Zhang             jl(i) = mbs indicates the end of a list
1519653a6975SHong Zhang      il(i): points to the first nonzero element in U(i,k:mbs-1)
1520653a6975SHong Zhang   */
15219566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs, &rtmp));
15229566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1523b00f7748SHong Zhang 
1524b00f7748SHong Zhang   do {
152507b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
15266df5ee2eSHong Zhang     il[0]         = 0;
1527653a6975SHong Zhang     for (i = 0; i < mbs; i++) {
15289371c9d4SSatish Balay       rtmp[i] = 0.0;
15299371c9d4SSatish Balay       jl[i]   = mbs;
1530653a6975SHong Zhang     }
1531653a6975SHong Zhang 
1532997a0949SHong Zhang     for (k = 0; k < mbs; k++) {
1533653a6975SHong Zhang       /*initialize k-th row with elements nonzero in row perm(k) of A */
1534653a6975SHong Zhang       nz   = ai[k + 1] - ai[k];
1535653a6975SHong Zhang       acol = aj + ai[k];
1536653a6975SHong Zhang       aval = aa + ai[k];
1537653a6975SHong Zhang       bval = ba + bi[k];
1538653a6975SHong Zhang       while (nz--) {
1539653a6975SHong Zhang         rtmp[*acol++] = *aval++;
1540653a6975SHong Zhang         *bval++       = 0.0; /* for in-place factorization */
1541653a6975SHong Zhang       }
15423cea4cbeSHong Zhang 
15433cea4cbeSHong Zhang       /* shift the diagonal of the matrix */
15443cea4cbeSHong Zhang       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1545653a6975SHong Zhang 
1546653a6975SHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1547653a6975SHong Zhang       dk = rtmp[k];
1548653a6975SHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
1549653a6975SHong Zhang 
1550653a6975SHong Zhang       while (i < k) {
1551653a6975SHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
1552653a6975SHong Zhang         /* compute multiplier, update D(k) and U(i,k) */
1553653a6975SHong Zhang         ili   = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1554653a6975SHong Zhang         uikdi = -ba[ili] * ba[bi[i]];
1555653a6975SHong Zhang         dk += uikdi * ba[ili];
1556653a6975SHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
1557653a6975SHong Zhang 
1558653a6975SHong Zhang         /* add multiple of row i to k-th row ... */
1559653a6975SHong Zhang         jmin = ili + 1;
1560653a6975SHong Zhang         nz   = bi[i + 1] - jmin;
1561653a6975SHong Zhang         if (nz > 0) {
1562653a6975SHong Zhang           bcol = bj + jmin;
1563653a6975SHong Zhang           bval = ba + jmin;
15649566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(2.0 * nz));
1565653a6975SHong Zhang           while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
1566187a9f4bSHong Zhang 
1567997a0949SHong Zhang           /* update il and jl for i-th row */
1568997a0949SHong Zhang           il[i] = jmin;
15699371c9d4SSatish Balay           j     = bj[jmin];
15709371c9d4SSatish Balay           jl[i] = jl[j];
15719371c9d4SSatish Balay           jl[j] = i;
1572653a6975SHong Zhang         }
1573653a6975SHong Zhang         i = nexti;
1574653a6975SHong Zhang       }
1575653a6975SHong Zhang 
15763cea4cbeSHong Zhang       /* shift the diagonals when zero pivot is detected */
15773cea4cbeSHong Zhang       /* compute rs=sum of abs(off-diagonal) */
15783cea4cbeSHong Zhang       rs   = 0.0;
157921c26570Svictorle       jmin = bi[k] + 1;
158021c26570Svictorle       nz   = bi[k + 1] - jmin;
158121c26570Svictorle       if (nz) {
158221c26570Svictorle         bcol = bj + jmin;
158321c26570Svictorle         while (nz--) {
158489f845c8SHong Zhang           rs += PetscAbsScalar(rtmp[*bcol]);
158589f845c8SHong Zhang           bcol++;
158621c26570Svictorle         }
158721c26570Svictorle       }
15883cea4cbeSHong Zhang 
15893cea4cbeSHong Zhang       sctx.rs = rs;
15903cea4cbeSHong Zhang       sctx.pv = dk;
15919566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
159207b50cabSHong Zhang       if (sctx.newshift) break; /* sctx.shift_amount is updated */
15930e95ead3SHong Zhang       dk = sctx.pv;
1594653a6975SHong Zhang 
1595997a0949SHong Zhang       /* copy data into U(k,:) */
1596653a6975SHong Zhang       ba[bi[k]] = 1.0 / dk;
1597653a6975SHong Zhang       jmin      = bi[k] + 1;
1598653a6975SHong Zhang       nz        = bi[k + 1] - jmin;
1599653a6975SHong Zhang       if (nz) {
1600653a6975SHong Zhang         bcol = bj + jmin;
1601653a6975SHong Zhang         bval = ba + jmin;
1602653a6975SHong Zhang         while (nz--) {
1603653a6975SHong Zhang           *bval++       = rtmp[*bcol];
1604653a6975SHong Zhang           rtmp[*bcol++] = 0.0;
1605653a6975SHong Zhang         }
1606997a0949SHong Zhang         /* add k-th row into il and jl */
1607653a6975SHong Zhang         il[k] = jmin;
16089371c9d4SSatish Balay         i     = bj[jmin];
16099371c9d4SSatish Balay         jl[k] = jl[i];
16109371c9d4SSatish Balay         jl[i] = k;
1611653a6975SHong Zhang       }
1612b00f7748SHong Zhang     } /* end of for (k = 0; k<mbs; k++) */
161307b50cabSHong Zhang   } while (sctx.newshift);
16149566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
16159566063dSJacob Faibussowitsch   PetscCall(PetscFree2(il, jl));
1616653a6975SHong Zhang 
16170a3c351aSHong Zhang   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
16184f79d315SHong Zhang   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
16190a3c351aSHong Zhang   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
16200a3c351aSHong Zhang   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
16210a3c351aSHong Zhang   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1622db4efbfdSBarry Smith 
1623653a6975SHong Zhang   C->assembled    = PETSC_TRUE;
1624653a6975SHong Zhang   C->preallocated = PETSC_TRUE;
162526fbe8dcSKarl Rupp 
16269566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->N));
16273cea4cbeSHong Zhang   if (sctx.nshift) {
1628f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
16299566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1630f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
16319566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1632b00f7748SHong Zhang     }
1633fb10cecfSBarry Smith   }
1634653a6975SHong Zhang   PetscFunctionReturn(0);
1635653a6975SHong Zhang }
1636653a6975SHong Zhang 
1637d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1638d71ae5a4SJacob Faibussowitsch {
163949b5e25fSSatish Balay   Mat C;
164049b5e25fSSatish Balay 
164149b5e25fSSatish Balay   PetscFunctionBegin;
16429566063dSJacob Faibussowitsch   PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
16439566063dSJacob Faibussowitsch   PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
16449566063dSJacob Faibussowitsch   PetscCall(MatCholeskyFactorNumeric(C, A, info));
164526fbe8dcSKarl Rupp 
1646db4efbfdSBarry Smith   A->ops->solve          = C->ops->solve;
1647db4efbfdSBarry Smith   A->ops->solvetranspose = C->ops->solvetranspose;
164826fbe8dcSKarl Rupp 
16499566063dSJacob Faibussowitsch   PetscCall(MatHeaderMerge(A, &C));
165049b5e25fSSatish Balay   PetscFunctionReturn(0);
165149b5e25fSSatish Balay }
1652