xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision ce6b3536d5fd46c99c6df19dc764ec34b4d2762c)
1be1d678aSKris Buschelman 
25d517e7eSBarry Smith /*
3ec3a8b7bSBarry Smith     Factorization code for BAIJ format.
45d517e7eSBarry Smith */
5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>
6af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
7ec3a8b7bSBarry Smith 
8d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat B, Mat A, const MatFactorInfo *info)
9d71ae5a4SJacob Faibussowitsch {
10b588c5a2SHong Zhang   Mat             C = B;
11b588c5a2SHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
12b588c5a2SHong Zhang   IS              isrow = b->row, isicol = b->icol;
135a586d82SBarry Smith   const PetscInt *r, *ic;
14bbd65245SShri Abhyankar   const PetscInt  n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, bs2 = a->bs2;
15bbd65245SShri Abhyankar   const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag;
16bbd65245SShri Abhyankar   MatScalar      *rtmp, *pc, *mwork, *pv;
17bbd65245SShri Abhyankar   MatScalar      *aa             = a->a, *v;
18182b8fbaSHong Zhang   PetscReal       shift          = info->shiftamount;
19ff6a9541SJacob Faibussowitsch   const PetscBool allowzeropivot = PetscNot(A->erroriffailure);
20b588c5a2SHong Zhang 
21b588c5a2SHong Zhang   PetscFunctionBegin;
229566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
239566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
24b588c5a2SHong Zhang 
254c000e2eSHong Zhang   /* generate work space needed by the factorization */
269566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork));
279566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, bs2 * n));
28b588c5a2SHong Zhang 
29ff6a9541SJacob Faibussowitsch   for (PetscInt i = 0; i < n; i++) {
30b588c5a2SHong Zhang     /* zero rtmp */
31b588c5a2SHong Zhang     /* L part */
32ff6a9541SJacob Faibussowitsch     PetscInt *pj;
33ff6a9541SJacob Faibussowitsch     PetscInt  nzL, nz = bi[i + 1] - bi[i];
34b588c5a2SHong Zhang     bjtmp = bj + bi[i];
35ff6a9541SJacob Faibussowitsch     for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));
36b588c5a2SHong Zhang 
37b588c5a2SHong Zhang     /* U part */
380c4413a7SShri Abhyankar     nz    = bdiag[i] - bdiag[i + 1];
390c4413a7SShri Abhyankar     bjtmp = bj + bdiag[i + 1] + 1;
40ff6a9541SJacob Faibussowitsch     for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));
410c4413a7SShri Abhyankar 
420c4413a7SShri Abhyankar     /* load in initial (unfactored row) */
430c4413a7SShri Abhyankar     nz    = ai[r[i] + 1] - ai[r[i]];
440c4413a7SShri Abhyankar     ajtmp = aj + ai[r[i]];
450c4413a7SShri Abhyankar     v     = aa + bs2 * ai[r[i]];
46ff6a9541SJacob Faibussowitsch     for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ic[ajtmp[j]], v + bs2 * j, bs2));
470c4413a7SShri Abhyankar 
480c4413a7SShri Abhyankar     /* elimination */
490c4413a7SShri Abhyankar     bjtmp = bj + bi[i];
500c4413a7SShri Abhyankar     nzL   = bi[i + 1] - bi[i];
51ff6a9541SJacob Faibussowitsch     for (PetscInt k = 0; k < nzL; k++) {
52ff6a9541SJacob Faibussowitsch       PetscBool flg = PETSC_FALSE;
53ff6a9541SJacob Faibussowitsch       PetscInt  row = bjtmp[k];
54ff6a9541SJacob Faibussowitsch 
550c4413a7SShri Abhyankar       pc = rtmp + bs2 * row;
56ff6a9541SJacob Faibussowitsch       for (PetscInt j = 0; j < bs2; j++) {
57c35f09e5SBarry Smith         if (pc[j] != (PetscScalar)0.0) {
58ff6a9541SJacob Faibussowitsch           flg = PETSC_TRUE;
59c35f09e5SBarry Smith           break;
60c35f09e5SBarry Smith         }
61c35f09e5SBarry Smith       }
620c4413a7SShri Abhyankar       if (flg) {
630c4413a7SShri Abhyankar         pv = b->a + bs2 * bdiag[row];
6496b95a6bSBarry Smith         /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */
659566063dSJacob Faibussowitsch         PetscCall(PetscKernel_A_gets_A_times_B_2(pc, pv, mwork));
660c4413a7SShri Abhyankar 
67a5b23f4aSJose E. Roman         pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
680c4413a7SShri Abhyankar         pv = b->a + bs2 * (bdiag[row + 1] + 1);
690c4413a7SShri Abhyankar         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */
70ff6a9541SJacob Faibussowitsch         for (PetscInt j = 0; j < nz; j++) {
7196b95a6bSBarry Smith           /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */
720c4413a7SShri Abhyankar           /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */
730c4413a7SShri Abhyankar           v = rtmp + 4 * pj[j];
749566063dSJacob Faibussowitsch           PetscCall(PetscKernel_A_gets_A_minus_B_times_C_2(v, pc, pv));
750c4413a7SShri Abhyankar           pv += 4;
760c4413a7SShri Abhyankar         }
779566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(16.0 * nz + 12)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */
780c4413a7SShri Abhyankar       }
790c4413a7SShri Abhyankar     }
800c4413a7SShri Abhyankar 
810c4413a7SShri Abhyankar     /* finished row so stick it into b->a */
820c4413a7SShri Abhyankar     /* L part */
830c4413a7SShri Abhyankar     pv = b->a + bs2 * bi[i];
840c4413a7SShri Abhyankar     pj = b->j + bi[i];
850c4413a7SShri Abhyankar     nz = bi[i + 1] - bi[i];
86ff6a9541SJacob Faibussowitsch     for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));
870c4413a7SShri Abhyankar 
88a5b23f4aSJose E. Roman     /* Mark diagonal and invert diagonal for simpler triangular solves */
890c4413a7SShri Abhyankar     pv = b->a + bs2 * bdiag[i];
900c4413a7SShri Abhyankar     pj = b->j + bdiag[i];
919566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2));
92ff6a9541SJacob Faibussowitsch     {
93ff6a9541SJacob Faibussowitsch       PetscBool zeropivotdetected;
94ff6a9541SJacob Faibussowitsch 
959566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_2(pv, shift, allowzeropivot, &zeropivotdetected));
967b6c816cSBarry Smith       if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
97ff6a9541SJacob Faibussowitsch     }
980c4413a7SShri Abhyankar 
990c4413a7SShri Abhyankar     /* U part */
1000c4413a7SShri Abhyankar     pv = b->a + bs2 * (bdiag[i + 1] + 1);
1010c4413a7SShri Abhyankar     pj = b->j + bdiag[i + 1] + 1;
1020c4413a7SShri Abhyankar     nz = bdiag[i] - bdiag[i + 1] - 1;
103ff6a9541SJacob Faibussowitsch     for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));
1040c4413a7SShri Abhyankar   }
1050c4413a7SShri Abhyankar 
1069566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rtmp, mwork));
1079566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
1089566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
10926fbe8dcSKarl Rupp 
1104dd39f65SShri Abhyankar   C->ops->solve          = MatSolve_SeqBAIJ_2;
1114dd39f65SShri Abhyankar   C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2;
1120c4413a7SShri Abhyankar   C->assembled           = PETSC_TRUE;
11326fbe8dcSKarl Rupp 
1149566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(1.333333333333 * 2 * 2 * 2 * n)); /* from inverting diagonal blocks */
1153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1160c4413a7SShri Abhyankar }
1170c4413a7SShri Abhyankar 
118d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
119d71ae5a4SJacob Faibussowitsch {
1204c000e2eSHong Zhang   Mat             C = B;
1214c000e2eSHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
122bbd65245SShri Abhyankar   PetscInt        i, j, k, nz, nzL, row, *pj;
123bbd65245SShri Abhyankar   const PetscInt  n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, bs2 = a->bs2;
124bbd65245SShri Abhyankar   const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag;
125bbd65245SShri Abhyankar   MatScalar      *rtmp, *pc, *mwork, *pv;
126bbd65245SShri Abhyankar   MatScalar      *aa = a->a, *v;
127bbd65245SShri Abhyankar   PetscInt        flg;
128182b8fbaSHong Zhang   PetscReal       shift = info->shiftamount;
129a455e926SHong Zhang   PetscBool       allowzeropivot, zeropivotdetected;
1304c000e2eSHong Zhang 
1314c000e2eSHong Zhang   PetscFunctionBegin;
1320164db54SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
1330164db54SHong Zhang 
1344c000e2eSHong Zhang   /* generate work space needed by the factorization */
1359566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork));
1369566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, bs2 * n));
1374c000e2eSHong Zhang 
1384c000e2eSHong Zhang   for (i = 0; i < n; i++) {
1394c000e2eSHong Zhang     /* zero rtmp */
1404c000e2eSHong Zhang     /* L part */
1414c000e2eSHong Zhang     nz    = bi[i + 1] - bi[i];
1424c000e2eSHong Zhang     bjtmp = bj + bi[i];
14348a46eb9SPierre Jolivet     for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));
1444c000e2eSHong Zhang 
1454c000e2eSHong Zhang     /* U part */
146b2b2dd24SShri Abhyankar     nz    = bdiag[i] - bdiag[i + 1];
147b2b2dd24SShri Abhyankar     bjtmp = bj + bdiag[i + 1] + 1;
14848a46eb9SPierre Jolivet     for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));
149b2b2dd24SShri Abhyankar 
150b2b2dd24SShri Abhyankar     /* load in initial (unfactored row) */
151b2b2dd24SShri Abhyankar     nz    = ai[i + 1] - ai[i];
152b2b2dd24SShri Abhyankar     ajtmp = aj + ai[i];
153b2b2dd24SShri Abhyankar     v     = aa + bs2 * ai[i];
15448a46eb9SPierre Jolivet     for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ajtmp[j], v + bs2 * j, bs2));
155b2b2dd24SShri Abhyankar 
156b2b2dd24SShri Abhyankar     /* elimination */
157b2b2dd24SShri Abhyankar     bjtmp = bj + bi[i];
158b2b2dd24SShri Abhyankar     nzL   = bi[i + 1] - bi[i];
159b2b2dd24SShri Abhyankar     for (k = 0; k < nzL; k++) {
160b2b2dd24SShri Abhyankar       row = bjtmp[k];
161b2b2dd24SShri Abhyankar       pc  = rtmp + bs2 * row;
162c35f09e5SBarry Smith       for (flg = 0, j = 0; j < bs2; j++) {
163c35f09e5SBarry Smith         if (pc[j] != (PetscScalar)0.0) {
164c35f09e5SBarry Smith           flg = 1;
165c35f09e5SBarry Smith           break;
166c35f09e5SBarry Smith         }
167c35f09e5SBarry Smith       }
168b2b2dd24SShri Abhyankar       if (flg) {
169b2b2dd24SShri Abhyankar         pv = b->a + bs2 * bdiag[row];
17096b95a6bSBarry Smith         /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */
1719566063dSJacob Faibussowitsch         PetscCall(PetscKernel_A_gets_A_times_B_2(pc, pv, mwork));
172b2b2dd24SShri Abhyankar 
173b2b2dd24SShri Abhyankar         pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
174b2b2dd24SShri Abhyankar         pv = b->a + bs2 * (bdiag[row + 1] + 1);
175b2b2dd24SShri Abhyankar         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
176b2b2dd24SShri Abhyankar         for (j = 0; j < nz; j++) {
17796b95a6bSBarry Smith           /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */
178b2b2dd24SShri Abhyankar           /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */
179b2b2dd24SShri Abhyankar           v = rtmp + 4 * pj[j];
1809566063dSJacob Faibussowitsch           PetscCall(PetscKernel_A_gets_A_minus_B_times_C_2(v, pc, pv));
181b2b2dd24SShri Abhyankar           pv += 4;
182b2b2dd24SShri Abhyankar         }
1839566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(16.0 * nz + 12)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */
184b2b2dd24SShri Abhyankar       }
185b2b2dd24SShri Abhyankar     }
186b2b2dd24SShri Abhyankar 
187b2b2dd24SShri Abhyankar     /* finished row so stick it into b->a */
188b2b2dd24SShri Abhyankar     /* L part */
189b2b2dd24SShri Abhyankar     pv = b->a + bs2 * bi[i];
190b2b2dd24SShri Abhyankar     pj = b->j + bi[i];
191b2b2dd24SShri Abhyankar     nz = bi[i + 1] - bi[i];
19248a46eb9SPierre Jolivet     for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));
193b2b2dd24SShri Abhyankar 
194a5b23f4aSJose E. Roman     /* Mark diagonal and invert diagonal for simpler triangular solves */
195b2b2dd24SShri Abhyankar     pv = b->a + bs2 * bdiag[i];
196b2b2dd24SShri Abhyankar     pj = b->j + bdiag[i];
1979566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2));
1989566063dSJacob Faibussowitsch     PetscCall(PetscKernel_A_gets_inverse_A_2(pv, shift, allowzeropivot, &zeropivotdetected));
1997b6c816cSBarry Smith     if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
200b2b2dd24SShri Abhyankar 
201b2b2dd24SShri Abhyankar     /* U part */
202b2b2dd24SShri Abhyankar     /*
203b2b2dd24SShri Abhyankar     pv = b->a + bs2*bi[2*n-i];
204b2b2dd24SShri Abhyankar     pj = b->j + bi[2*n-i];
205b2b2dd24SShri Abhyankar     nz = bi[2*n-i+1] - bi[2*n-i] - 1;
206b2b2dd24SShri Abhyankar     */
207b2b2dd24SShri Abhyankar     pv = b->a + bs2 * (bdiag[i + 1] + 1);
208b2b2dd24SShri Abhyankar     pj = b->j + bdiag[i + 1] + 1;
209b2b2dd24SShri Abhyankar     nz = bdiag[i] - bdiag[i + 1] - 1;
21048a46eb9SPierre Jolivet     for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));
211b2b2dd24SShri Abhyankar   }
2129566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rtmp, mwork));
213ae3d28f0SHong Zhang 
2144dd39f65SShri Abhyankar   C->ops->solve          = MatSolve_SeqBAIJ_2_NaturalOrdering;
2159f5c0bcdSBarry Smith   C->ops->forwardsolve   = MatForwardSolve_SeqBAIJ_2_NaturalOrdering;
2169f5c0bcdSBarry Smith   C->ops->backwardsolve  = MatBackwardSolve_SeqBAIJ_2_NaturalOrdering;
2174dd39f65SShri Abhyankar   C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
218b2b2dd24SShri Abhyankar   C->assembled           = PETSC_TRUE;
21926fbe8dcSKarl Rupp 
2209566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(1.333333333333 * 2 * 2 * 2 * n)); /* from inverting diagonal blocks */
2213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
222b2b2dd24SShri Abhyankar }
223b2b2dd24SShri Abhyankar 
224d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_inplace(Mat B, Mat A, const MatFactorInfo *info)
225d71ae5a4SJacob Faibussowitsch {
226719d5645SBarry Smith   Mat             C = B;
2274eeb42bcSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
2287cdf2dbbSSatish Balay   IS              isrow = b->row, isicol = b->icol;
2295d0c19d7SBarry Smith   const PetscInt *r, *ic;
2305d0c19d7SBarry Smith   PetscInt        i, j, n = a->mbs, *bi = b->i, *bj = b->j;
231690b6cddSBarry Smith   PetscInt       *ajtmpold, *ajtmp, nz, row;
232690b6cddSBarry Smith   PetscInt       *diag_offset = b->diag, idx, *ai = a->i, *aj = a->j, *pj;
233329f5518SBarry Smith   MatScalar      *pv, *v, *rtmp, m1, m2, m3, m4, *pc, *w, *x, x1, x2, x3, x4;
2342515f8d2SSatish Balay   MatScalar       p1, p2, p3, p4;
2353f1db9ecSBarry Smith   MatScalar      *ba = b->a, *aa = a->a;
236182b8fbaSHong Zhang   PetscReal       shift = info->shiftamount;
237a455e926SHong Zhang   PetscBool       allowzeropivot, zeropivotdetected;
2384eeb42bcSBarry Smith 
2393a40ed3dSBarry Smith   PetscFunctionBegin;
2400164db54SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
2419566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
2429566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
2439566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(4 * (n + 1), &rtmp));
2444eeb42bcSBarry Smith 
2454eeb42bcSBarry Smith   for (i = 0; i < n; i++) {
2464078e994SBarry Smith     nz    = bi[i + 1] - bi[i];
2474078e994SBarry Smith     ajtmp = bj + bi[i];
2484eeb42bcSBarry Smith     for (j = 0; j < nz; j++) {
2499371c9d4SSatish Balay       x    = rtmp + 4 * ajtmp[j];
2509371c9d4SSatish Balay       x[0] = x[1] = x[2] = x[3] = 0.0;
2514eeb42bcSBarry Smith     }
2524eeb42bcSBarry Smith     /* load in initial (unfactored row) */
2534eeb42bcSBarry Smith     idx      = r[i];
2544078e994SBarry Smith     nz       = ai[idx + 1] - ai[idx];
2554078e994SBarry Smith     ajtmpold = aj + ai[idx];
2564078e994SBarry Smith     v        = aa + 4 * ai[idx];
2574eeb42bcSBarry Smith     for (j = 0; j < nz; j++) {
2584eeb42bcSBarry Smith       x    = rtmp + 4 * ic[ajtmpold[j]];
2599371c9d4SSatish Balay       x[0] = v[0];
2609371c9d4SSatish Balay       x[1] = v[1];
2619371c9d4SSatish Balay       x[2] = v[2];
2629371c9d4SSatish Balay       x[3] = v[3];
2634eeb42bcSBarry Smith       v += 4;
2644eeb42bcSBarry Smith     }
2654eeb42bcSBarry Smith     row = *ajtmp++;
2664eeb42bcSBarry Smith     while (row < i) {
2674eeb42bcSBarry Smith       pc = rtmp + 4 * row;
2689371c9d4SSatish Balay       p1 = pc[0];
2699371c9d4SSatish Balay       p2 = pc[1];
2709371c9d4SSatish Balay       p3 = pc[2];
2719371c9d4SSatish Balay       p4 = pc[3];
272d4a378daSJed Brown       if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) {
2734078e994SBarry Smith         pv    = ba + 4 * diag_offset[row];
2744078e994SBarry Smith         pj    = bj + diag_offset[row] + 1;
2759371c9d4SSatish Balay         x1    = pv[0];
2769371c9d4SSatish Balay         x2    = pv[1];
2779371c9d4SSatish Balay         x3    = pv[2];
2789371c9d4SSatish Balay         x4    = pv[3];
2794eeb42bcSBarry Smith         pc[0] = m1 = p1 * x1 + p3 * x2;
2804eeb42bcSBarry Smith         pc[1] = m2 = p2 * x1 + p4 * x2;
2814eeb42bcSBarry Smith         pc[2] = m3 = p1 * x3 + p3 * x4;
2824eeb42bcSBarry Smith         pc[3] = m4 = p2 * x3 + p4 * x4;
2834078e994SBarry Smith         nz         = bi[row + 1] - diag_offset[row] - 1;
2844eeb42bcSBarry Smith         pv += 4;
2854eeb42bcSBarry Smith         for (j = 0; j < nz; j++) {
2869371c9d4SSatish Balay           x1 = pv[0];
2879371c9d4SSatish Balay           x2 = pv[1];
2889371c9d4SSatish Balay           x3 = pv[2];
2899371c9d4SSatish Balay           x4 = pv[3];
2904eeb42bcSBarry Smith           x  = rtmp + 4 * pj[j];
2914eeb42bcSBarry Smith           x[0] -= m1 * x1 + m3 * x2;
2924eeb42bcSBarry Smith           x[1] -= m2 * x1 + m4 * x2;
2934eeb42bcSBarry Smith           x[2] -= m1 * x3 + m3 * x4;
2944eeb42bcSBarry Smith           x[3] -= m2 * x3 + m4 * x4;
2954eeb42bcSBarry Smith           pv += 4;
2964eeb42bcSBarry Smith         }
2979566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(16.0 * nz + 12.0));
2984eeb42bcSBarry Smith       }
2994eeb42bcSBarry Smith       row = *ajtmp++;
3004eeb42bcSBarry Smith     }
3014eeb42bcSBarry Smith     /* finished row so stick it into b->a */
3024078e994SBarry Smith     pv = ba + 4 * bi[i];
3034078e994SBarry Smith     pj = bj + bi[i];
3044078e994SBarry Smith     nz = bi[i + 1] - bi[i];
3054eeb42bcSBarry Smith     for (j = 0; j < nz; j++) {
3064eeb42bcSBarry Smith       x     = rtmp + 4 * pj[j];
3079371c9d4SSatish Balay       pv[0] = x[0];
3089371c9d4SSatish Balay       pv[1] = x[1];
3099371c9d4SSatish Balay       pv[2] = x[2];
3109371c9d4SSatish Balay       pv[3] = x[3];
3114eeb42bcSBarry Smith       pv += 4;
3124eeb42bcSBarry Smith     }
3134eeb42bcSBarry Smith     /* invert diagonal block */
3144078e994SBarry Smith     w = ba + 4 * diag_offset[i];
3159566063dSJacob Faibussowitsch     PetscCall(PetscKernel_A_gets_inverse_A_2(w, shift, allowzeropivot, &zeropivotdetected));
3167b6c816cSBarry Smith     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3174eeb42bcSBarry Smith   }
3184eeb42bcSBarry Smith 
3199566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
3209566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
3219566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
32226fbe8dcSKarl Rupp 
32306e38f1dSHong Zhang   C->ops->solve          = MatSolve_SeqBAIJ_2_inplace;
32406e38f1dSHong Zhang   C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_inplace;
3254eeb42bcSBarry Smith   C->assembled           = PETSC_TRUE;
32626fbe8dcSKarl Rupp 
3279566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(1.333333333333 * 8 * b->mbs)); /* from inverting diagonal blocks */
3283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3294eeb42bcSBarry Smith }
3304cd38560SBarry Smith /*
3314cd38560SBarry Smith       Version for when blocks are 2 by 2 Using natural ordering
3324cd38560SBarry Smith */
333d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
334d71ae5a4SJacob Faibussowitsch {
3354cd38560SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
336690b6cddSBarry Smith   PetscInt     i, j, n = a->mbs, *bi = b->i, *bj = b->j;
337690b6cddSBarry Smith   PetscInt    *ajtmpold, *ajtmp, nz, row;
338690b6cddSBarry Smith   PetscInt    *diag_offset = b->diag, *ai = a->i, *aj = a->j, *pj;
339329f5518SBarry Smith   MatScalar   *pv, *v, *rtmp, *pc, *w, *x;
3402515f8d2SSatish Balay   MatScalar    p1, p2, p3, p4, m1, m2, m3, m4, x1, x2, x3, x4;
3414cd38560SBarry Smith   MatScalar   *ba = b->a, *aa = a->a;
342182b8fbaSHong Zhang   PetscReal    shift = info->shiftamount;
343a455e926SHong Zhang   PetscBool    allowzeropivot, zeropivotdetected;
3444cd38560SBarry Smith 
3454cd38560SBarry Smith   PetscFunctionBegin;
3460164db54SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
3479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(4 * (n + 1), &rtmp));
3484cd38560SBarry Smith   for (i = 0; i < n; i++) {
3494cd38560SBarry Smith     nz    = bi[i + 1] - bi[i];
3504cd38560SBarry Smith     ajtmp = bj + bi[i];
3514cd38560SBarry Smith     for (j = 0; j < nz; j++) {
3524cd38560SBarry Smith       x    = rtmp + 4 * ajtmp[j];
3534cd38560SBarry Smith       x[0] = x[1] = x[2] = x[3] = 0.0;
3544cd38560SBarry Smith     }
3554cd38560SBarry Smith     /* load in initial (unfactored row) */
3564cd38560SBarry Smith     nz       = ai[i + 1] - ai[i];
3574cd38560SBarry Smith     ajtmpold = aj + ai[i];
3584cd38560SBarry Smith     v        = aa + 4 * ai[i];
3594cd38560SBarry Smith     for (j = 0; j < nz; j++) {
3604cd38560SBarry Smith       x    = rtmp + 4 * ajtmpold[j];
3619371c9d4SSatish Balay       x[0] = v[0];
3629371c9d4SSatish Balay       x[1] = v[1];
3639371c9d4SSatish Balay       x[2] = v[2];
3649371c9d4SSatish Balay       x[3] = v[3];
3654cd38560SBarry Smith       v += 4;
3664cd38560SBarry Smith     }
3674cd38560SBarry Smith     row = *ajtmp++;
3684cd38560SBarry Smith     while (row < i) {
3694cd38560SBarry Smith       pc = rtmp + 4 * row;
3709371c9d4SSatish Balay       p1 = pc[0];
3719371c9d4SSatish Balay       p2 = pc[1];
3729371c9d4SSatish Balay       p3 = pc[2];
3739371c9d4SSatish Balay       p4 = pc[3];
374d4a378daSJed Brown       if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) {
3754cd38560SBarry Smith         pv    = ba + 4 * diag_offset[row];
3764cd38560SBarry Smith         pj    = bj + diag_offset[row] + 1;
3779371c9d4SSatish Balay         x1    = pv[0];
3789371c9d4SSatish Balay         x2    = pv[1];
3799371c9d4SSatish Balay         x3    = pv[2];
3809371c9d4SSatish Balay         x4    = pv[3];
3814cd38560SBarry Smith         pc[0] = m1 = p1 * x1 + p3 * x2;
3824cd38560SBarry Smith         pc[1] = m2 = p2 * x1 + p4 * x2;
3834cd38560SBarry Smith         pc[2] = m3 = p1 * x3 + p3 * x4;
3844cd38560SBarry Smith         pc[3] = m4 = p2 * x3 + p4 * x4;
3854cd38560SBarry Smith         nz         = bi[row + 1] - diag_offset[row] - 1;
3864cd38560SBarry Smith         pv += 4;
3874cd38560SBarry Smith         for (j = 0; j < nz; j++) {
3889371c9d4SSatish Balay           x1 = pv[0];
3899371c9d4SSatish Balay           x2 = pv[1];
3909371c9d4SSatish Balay           x3 = pv[2];
3919371c9d4SSatish Balay           x4 = pv[3];
3924cd38560SBarry Smith           x  = rtmp + 4 * pj[j];
3934cd38560SBarry Smith           x[0] -= m1 * x1 + m3 * x2;
3944cd38560SBarry Smith           x[1] -= m2 * x1 + m4 * x2;
3954cd38560SBarry Smith           x[2] -= m1 * x3 + m3 * x4;
3964cd38560SBarry Smith           x[3] -= m2 * x3 + m4 * x4;
3974cd38560SBarry Smith           pv += 4;
3984cd38560SBarry Smith         }
3999566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(16.0 * nz + 12.0));
4004cd38560SBarry Smith       }
4014cd38560SBarry Smith       row = *ajtmp++;
4024cd38560SBarry Smith     }
4034cd38560SBarry Smith     /* finished row so stick it into b->a */
4044cd38560SBarry Smith     pv = ba + 4 * bi[i];
4054cd38560SBarry Smith     pj = bj + bi[i];
4064cd38560SBarry Smith     nz = bi[i + 1] - bi[i];
4074cd38560SBarry Smith     for (j = 0; j < nz; j++) {
4084cd38560SBarry Smith       x     = rtmp + 4 * pj[j];
4099371c9d4SSatish Balay       pv[0] = x[0];
4109371c9d4SSatish Balay       pv[1] = x[1];
4119371c9d4SSatish Balay       pv[2] = x[2];
4129371c9d4SSatish Balay       pv[3] = x[3];
4132efa7f71SHong Zhang       /*
4142efa7f71SHong Zhang       printf(" col %d:",pj[j]);
4152efa7f71SHong Zhang       PetscInt j1;
4162efa7f71SHong Zhang       for (j1=0; j1<4; j1++) printf(" %g,",*(pv+j1));
4172efa7f71SHong Zhang       printf("\n");
4182efa7f71SHong Zhang       */
4194cd38560SBarry Smith       pv += 4;
4204cd38560SBarry Smith     }
4214cd38560SBarry Smith     /* invert diagonal block */
4224cd38560SBarry Smith     w = ba + 4 * diag_offset[i];
4239566063dSJacob Faibussowitsch     PetscCall(PetscKernel_A_gets_inverse_A_2(w, shift, allowzeropivot, &zeropivotdetected));
4247b6c816cSBarry Smith     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
4254cd38560SBarry Smith   }
4264cd38560SBarry Smith 
4279566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
42826fbe8dcSKarl Rupp 
42906e38f1dSHong Zhang   C->ops->solve          = MatSolve_SeqBAIJ_2_NaturalOrdering_inplace;
43006e38f1dSHong Zhang   C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace;
4314cd38560SBarry Smith   C->assembled           = PETSC_TRUE;
43226fbe8dcSKarl Rupp 
4339566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(1.333333333333 * 8 * b->mbs)); /* from inverting diagonal blocks */
4343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4354cd38560SBarry Smith }
4367fc0212eSBarry Smith 
4377fc0212eSBarry Smith /*
4387fc0212eSBarry Smith      Version for when blocks are 1 by 1.
4397fc0212eSBarry Smith */
440d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat B, Mat A, const MatFactorInfo *info)
441d71ae5a4SJacob Faibussowitsch {
442048b5e81SShri Abhyankar   Mat              C = B;
443048b5e81SShri Abhyankar   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
444048b5e81SShri Abhyankar   IS               isrow = b->row, isicol = b->icol;
445048b5e81SShri Abhyankar   const PetscInt  *r, *ic, *ics;
446048b5e81SShri Abhyankar   const PetscInt   n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
447048b5e81SShri Abhyankar   PetscInt         i, j, k, nz, nzL, row, *pj;
448048b5e81SShri Abhyankar   const PetscInt  *ajtmp, *bjtmp;
449048b5e81SShri Abhyankar   MatScalar       *rtmp, *pc, multiplier, *pv;
450048b5e81SShri Abhyankar   const MatScalar *aa = a->a, *v;
451ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
452048b5e81SShri Abhyankar   FactorShiftCtx   sctx;
453048b5e81SShri Abhyankar   const PetscInt  *ddiag;
454048b5e81SShri Abhyankar   PetscReal        rs;
455048b5e81SShri Abhyankar   MatScalar        d;
456048b5e81SShri Abhyankar 
457048b5e81SShri Abhyankar   PetscFunctionBegin;
458048b5e81SShri Abhyankar   /* MatPivotSetUp(): initialize shift context sctx */
4599566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
460048b5e81SShri Abhyankar 
461048b5e81SShri Abhyankar   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
462048b5e81SShri Abhyankar     ddiag          = a->diag;
463048b5e81SShri Abhyankar     sctx.shift_top = info->zeropivot;
464048b5e81SShri Abhyankar     for (i = 0; i < n; i++) {
465048b5e81SShri Abhyankar       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
466048b5e81SShri Abhyankar       d  = (aa)[ddiag[i]];
467048b5e81SShri Abhyankar       rs = -PetscAbsScalar(d) - PetscRealPart(d);
468048b5e81SShri Abhyankar       v  = aa + ai[i];
469048b5e81SShri Abhyankar       nz = ai[i + 1] - ai[i];
47026fbe8dcSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
471048b5e81SShri Abhyankar       if (rs > sctx.shift_top) sctx.shift_top = rs;
472048b5e81SShri Abhyankar     }
473048b5e81SShri Abhyankar     sctx.shift_top *= 1.1;
474048b5e81SShri Abhyankar     sctx.nshift_max = 5;
475048b5e81SShri Abhyankar     sctx.shift_lo   = 0.;
476048b5e81SShri Abhyankar     sctx.shift_hi   = 1.;
477048b5e81SShri Abhyankar   }
478048b5e81SShri Abhyankar 
4799566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
4809566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
4819566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
482048b5e81SShri Abhyankar   ics = ic;
483048b5e81SShri Abhyankar 
484048b5e81SShri Abhyankar   do {
485048b5e81SShri Abhyankar     sctx.newshift = PETSC_FALSE;
486048b5e81SShri Abhyankar     for (i = 0; i < n; i++) {
487048b5e81SShri Abhyankar       /* zero rtmp */
488048b5e81SShri Abhyankar       /* L part */
489048b5e81SShri Abhyankar       nz    = bi[i + 1] - bi[i];
490048b5e81SShri Abhyankar       bjtmp = bj + bi[i];
491048b5e81SShri Abhyankar       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
492048b5e81SShri Abhyankar 
493048b5e81SShri Abhyankar       /* U part */
494048b5e81SShri Abhyankar       nz    = bdiag[i] - bdiag[i + 1];
495048b5e81SShri Abhyankar       bjtmp = bj + bdiag[i + 1] + 1;
496048b5e81SShri Abhyankar       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
497048b5e81SShri Abhyankar 
498048b5e81SShri Abhyankar       /* load in initial (unfactored row) */
499048b5e81SShri Abhyankar       nz    = ai[r[i] + 1] - ai[r[i]];
500048b5e81SShri Abhyankar       ajtmp = aj + ai[r[i]];
501048b5e81SShri Abhyankar       v     = aa + ai[r[i]];
50226fbe8dcSKarl Rupp       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
50326fbe8dcSKarl Rupp 
504048b5e81SShri Abhyankar       /* ZeropivotApply() */
505048b5e81SShri Abhyankar       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
506048b5e81SShri Abhyankar 
507048b5e81SShri Abhyankar       /* elimination */
508048b5e81SShri Abhyankar       bjtmp = bj + bi[i];
509048b5e81SShri Abhyankar       row   = *bjtmp++;
510048b5e81SShri Abhyankar       nzL   = bi[i + 1] - bi[i];
511048b5e81SShri Abhyankar       for (k = 0; k < nzL; k++) {
512048b5e81SShri Abhyankar         pc = rtmp + row;
513d4a378daSJed Brown         if (*pc != (PetscScalar)0.0) {
514048b5e81SShri Abhyankar           pv         = b->a + bdiag[row];
515048b5e81SShri Abhyankar           multiplier = *pc * (*pv);
516048b5e81SShri Abhyankar           *pc        = multiplier;
51726fbe8dcSKarl Rupp 
518048b5e81SShri Abhyankar           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
519048b5e81SShri Abhyankar           pv = b->a + bdiag[row + 1] + 1;
520048b5e81SShri Abhyankar           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
521048b5e81SShri Abhyankar           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
5229566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(2.0 * nz));
523048b5e81SShri Abhyankar         }
524048b5e81SShri Abhyankar         row = *bjtmp++;
525048b5e81SShri Abhyankar       }
526048b5e81SShri Abhyankar 
527048b5e81SShri Abhyankar       /* finished row so stick it into b->a */
528048b5e81SShri Abhyankar       rs = 0.0;
529048b5e81SShri Abhyankar       /* L part */
530048b5e81SShri Abhyankar       pv = b->a + bi[i];
531048b5e81SShri Abhyankar       pj = b->j + bi[i];
532048b5e81SShri Abhyankar       nz = bi[i + 1] - bi[i];
533048b5e81SShri Abhyankar       for (j = 0; j < nz; j++) {
5349371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
5359371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
536048b5e81SShri Abhyankar       }
537048b5e81SShri Abhyankar 
538048b5e81SShri Abhyankar       /* U part */
539048b5e81SShri Abhyankar       pv = b->a + bdiag[i + 1] + 1;
540048b5e81SShri Abhyankar       pj = b->j + bdiag[i + 1] + 1;
541048b5e81SShri Abhyankar       nz = bdiag[i] - bdiag[i + 1] - 1;
542048b5e81SShri Abhyankar       for (j = 0; j < nz; j++) {
5439371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
5449371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
545048b5e81SShri Abhyankar       }
546048b5e81SShri Abhyankar 
547048b5e81SShri Abhyankar       sctx.rs = rs;
548048b5e81SShri Abhyankar       sctx.pv = rtmp[i];
5499566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
550048b5e81SShri Abhyankar       if (sctx.newshift) break; /* break for-loop */
551048b5e81SShri Abhyankar       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
552048b5e81SShri Abhyankar 
553a5b23f4aSJose E. Roman       /* Mark diagonal and invert diagonal for simpler triangular solves */
554048b5e81SShri Abhyankar       pv  = b->a + bdiag[i];
555d4a378daSJed Brown       *pv = (PetscScalar)1.0 / rtmp[i];
556048b5e81SShri Abhyankar 
557048b5e81SShri Abhyankar     } /* endof for (i=0; i<n; i++) { */
558048b5e81SShri Abhyankar 
559048b5e81SShri Abhyankar     /* MatPivotRefine() */
560048b5e81SShri Abhyankar     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
561048b5e81SShri Abhyankar       /*
562048b5e81SShri Abhyankar        * if no shift in this attempt & shifting & started shifting & can refine,
563048b5e81SShri Abhyankar        * then try lower shift
564048b5e81SShri Abhyankar        */
565048b5e81SShri Abhyankar       sctx.shift_hi       = sctx.shift_fraction;
566048b5e81SShri Abhyankar       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
567048b5e81SShri Abhyankar       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
568048b5e81SShri Abhyankar       sctx.newshift       = PETSC_TRUE;
569048b5e81SShri Abhyankar       sctx.nshift++;
570048b5e81SShri Abhyankar     }
571048b5e81SShri Abhyankar   } while (sctx.newshift);
572048b5e81SShri Abhyankar 
5739566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
5749566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
5759566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
576048b5e81SShri Abhyankar 
5779566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
5789566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
579048b5e81SShri Abhyankar   if (row_identity && col_identity) {
580048b5e81SShri Abhyankar     C->ops->solve          = MatSolve_SeqBAIJ_1_NaturalOrdering;
5819f5c0bcdSBarry Smith     C->ops->forwardsolve   = MatForwardSolve_SeqBAIJ_1_NaturalOrdering;
5829f5c0bcdSBarry Smith     C->ops->backwardsolve  = MatBackwardSolve_SeqBAIJ_1_NaturalOrdering;
58393fd935bSShri Abhyankar     C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
584048b5e81SShri Abhyankar   } else {
585048b5e81SShri Abhyankar     C->ops->solve          = MatSolve_SeqBAIJ_1;
58693fd935bSShri Abhyankar     C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1;
587048b5e81SShri Abhyankar   }
588048b5e81SShri Abhyankar   C->assembled = PETSC_TRUE;
5899566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
590048b5e81SShri Abhyankar 
591048b5e81SShri Abhyankar   /* MatShiftView(A,info,&sctx) */
592048b5e81SShri Abhyankar   if (sctx.nshift) {
593048b5e81SShri Abhyankar     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
5949566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
595048b5e81SShri Abhyankar     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
5969566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
597048b5e81SShri Abhyankar     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
5989566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
599048b5e81SShri Abhyankar     }
600048b5e81SShri Abhyankar   }
6013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
602048b5e81SShri Abhyankar }
603048b5e81SShri Abhyankar 
604d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
605d71ae5a4SJacob Faibussowitsch {
6067fc0212eSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
6077cdf2dbbSSatish Balay   IS              isrow = b->row, isicol = b->icol;
6085d0c19d7SBarry Smith   const PetscInt *r, *ic;
6095d0c19d7SBarry Smith   PetscInt        i, j, n = a->mbs, *bi = b->i, *bj = b->j;
610690b6cddSBarry Smith   PetscInt       *ajtmpold, *ajtmp, nz, row, *ai = a->i, *aj = a->j;
611690b6cddSBarry Smith   PetscInt       *diag_offset = b->diag, diag, *pj;
612329f5518SBarry Smith   MatScalar      *pv, *v, *rtmp, multiplier, *pc;
6133f1db9ecSBarry Smith   MatScalar      *ba = b->a, *aa = a->a;
614ace3abfcSBarry Smith   PetscBool       row_identity, col_identity;
6157fc0212eSBarry Smith 
6163a40ed3dSBarry Smith   PetscFunctionBegin;
6179566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
6189566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
6199566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
6207fc0212eSBarry Smith 
6217fc0212eSBarry Smith   for (i = 0; i < n; i++) {
6224078e994SBarry Smith     nz    = bi[i + 1] - bi[i];
6234078e994SBarry Smith     ajtmp = bj + bi[i];
6247fc0212eSBarry Smith     for (j = 0; j < nz; j++) rtmp[ajtmp[j]] = 0.0;
6257fc0212eSBarry Smith 
6267fc0212eSBarry Smith     /* load in initial (unfactored row) */
6274078e994SBarry Smith     nz       = ai[r[i] + 1] - ai[r[i]];
6284078e994SBarry Smith     ajtmpold = aj + ai[r[i]];
6294078e994SBarry Smith     v        = aa + ai[r[i]];
6307fc0212eSBarry Smith     for (j = 0; j < nz; j++) rtmp[ic[ajtmpold[j]]] = v[j];
6317fc0212eSBarry Smith 
6327fc0212eSBarry Smith     row = *ajtmp++;
6337fc0212eSBarry Smith     while (row < i) {
6347fc0212eSBarry Smith       pc = rtmp + row;
6357fc0212eSBarry Smith       if (*pc != 0.0) {
6364078e994SBarry Smith         pv         = ba + diag_offset[row];
6374078e994SBarry Smith         pj         = bj + diag_offset[row] + 1;
6387fc0212eSBarry Smith         multiplier = *pc * *pv++;
6397fc0212eSBarry Smith         *pc        = multiplier;
6404078e994SBarry Smith         nz         = bi[row + 1] - diag_offset[row] - 1;
6417fc0212eSBarry Smith         for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
6429566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(1.0 + 2.0 * nz));
6437fc0212eSBarry Smith       }
6447fc0212eSBarry Smith       row = *ajtmp++;
6457fc0212eSBarry Smith     }
6467fc0212eSBarry Smith     /* finished row so stick it into b->a */
6474078e994SBarry Smith     pv = ba + bi[i];
6484078e994SBarry Smith     pj = bj + bi[i];
6494078e994SBarry Smith     nz = bi[i + 1] - bi[i];
65026fbe8dcSKarl Rupp     for (j = 0; j < nz; j++) pv[j] = rtmp[pj[j]];
6514078e994SBarry Smith     diag = diag_offset[i] - bi[i];
6527fc0212eSBarry Smith     /* check pivot entry for current row */
65308401ef6SPierre Jolivet     PetscCheck(pv[diag] != 0.0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
6547fc0212eSBarry Smith     pv[diag] = 1.0 / pv[diag];
6557fc0212eSBarry Smith   }
6567fc0212eSBarry Smith 
6579566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
6589566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
6599566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
6609566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
6619566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
662f58c8c74SBarry Smith   if (row_identity && col_identity) {
66306e38f1dSHong Zhang     C->ops->solve          = MatSolve_SeqBAIJ_1_NaturalOrdering_inplace;
66406e38f1dSHong Zhang     C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering_inplace;
665f58c8c74SBarry Smith   } else {
66606e38f1dSHong Zhang     C->ops->solve          = MatSolve_SeqBAIJ_1_inplace;
66706e38f1dSHong Zhang     C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_inplace;
668f58c8c74SBarry Smith   }
6697fc0212eSBarry Smith   C->assembled = PETSC_TRUE;
6709566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
6713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6727fc0212eSBarry Smith }
6737fc0212eSBarry Smith 
674d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
675d71ae5a4SJacob Faibussowitsch {
6764ac6704cSBarry Smith   PetscFunctionBegin;
6774ac6704cSBarry Smith   *type = MATSOLVERPETSC;
6783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6794ac6704cSBarry Smith }
6804ac6704cSBarry Smith 
681d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A, MatFactorType ftype, Mat *B)
682d71ae5a4SJacob Faibussowitsch {
683d0f46423SBarry Smith   PetscInt n = A->rmap->n;
684b24902e0SBarry Smith 
685b24902e0SBarry Smith   PetscFunctionBegin;
686b499a2aeSHong Zhang #if defined(PETSC_USE_COMPLEX)
687b94d7dedSBarry Smith   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || !(ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian Factor is not supported");
688b499a2aeSHong Zhang #endif
6899566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
6909566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, n, n, n, n));
691c8342467SHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
6929566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQBAIJ));
69326fbe8dcSKarl Rupp 
6944dd39f65SShri Abhyankar     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqBAIJ;
6954dd39f65SShri Abhyankar     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ;
6969566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
6979566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
6989566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
699b24902e0SBarry Smith   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
7009566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQSBAIJ));
7019566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*B, A->rmap->bs, MAT_SKIP_ALLOCATION, NULL));
70226fbe8dcSKarl Rupp 
7035c9eb25fSBarry Smith     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqBAIJ;
7045c9eb25fSBarry Smith     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ;
7054ac6704cSBarry Smith     /*  Future optimization would be direct symbolic and numerical factorization for BAIJ to support orderings and Cholesky, instead of first converting to SBAIJ */
7069566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
7079566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
708e32f2f54SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
709d5f3da31SBarry Smith   (*B)->factortype     = ftype;
710f73b0415SBarry Smith   (*B)->canuseordering = PETSC_TRUE;
71100c67f3bSHong Zhang 
7129566063dSJacob Faibussowitsch   PetscCall(PetscFree((*B)->solvertype));
7139566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
7149566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
7153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
716b24902e0SBarry Smith }
717273d9f13SBarry Smith 
718d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactor_SeqBAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
719d71ae5a4SJacob Faibussowitsch {
7205d517e7eSBarry Smith   Mat C;
7215d517e7eSBarry Smith 
7223a40ed3dSBarry Smith   PetscFunctionBegin;
7239566063dSJacob Faibussowitsch   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
7249566063dSJacob Faibussowitsch   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
7259566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(C, A, info));
72626fbe8dcSKarl Rupp 
727db4efbfdSBarry Smith   A->ops->solve          = C->ops->solve;
728db4efbfdSBarry Smith   A->ops->solvetranspose = C->ops->solvetranspose;
72926fbe8dcSKarl Rupp 
7309566063dSJacob Faibussowitsch   PetscCall(MatHeaderMerge(A, &C));
7313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7325d517e7eSBarry Smith }
7334cd38560SBarry Smith 
734c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
735d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
736d71ae5a4SJacob Faibussowitsch {
73778910aadSHong Zhang   Mat_SeqBAIJ    *a  = (Mat_SeqBAIJ *)A->data;
73878910aadSHong Zhang   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
73978910aadSHong Zhang   IS              ip = b->row;
7405d0c19d7SBarry Smith   const PetscInt *rip;
7415d0c19d7SBarry Smith   PetscInt        i, j, mbs = a->mbs, bs = A->rmap->bs, *bi = b->i, *bj = b->j, *bcol;
74278910aadSHong Zhang   PetscInt       *ai = a->i, *aj = a->j;
74378910aadSHong Zhang   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
74478910aadSHong Zhang   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
74507b50cabSHong Zhang   PetscReal       rs;
7460e95ead3SHong Zhang   FactorShiftCtx  sctx;
74778910aadSHong Zhang 
748c05c3958SHong Zhang   PetscFunctionBegin;
74907b50cabSHong Zhang   if (bs > 1) { /* convert A to a SBAIJ matrix and apply Cholesky factorization from it */
75048a46eb9SPierre Jolivet     if (!a->sbaijMat) PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat));
7519566063dSJacob Faibussowitsch     PetscCall((a->sbaijMat)->ops->choleskyfactornumeric(C, a->sbaijMat, info));
7529566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&a->sbaijMat));
7533ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
7546ad2eaddSHong Zhang   }
75578910aadSHong Zhang 
75607b50cabSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
7579566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
7583cea4cbeSHong Zhang 
7599566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
7609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
76178910aadSHong Zhang 
76275567043SBarry Smith   sctx.shift_amount = 0.;
7633cea4cbeSHong Zhang   sctx.nshift       = 0;
76478910aadSHong Zhang   do {
76507b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
76678910aadSHong Zhang     for (i = 0; i < mbs; i++) {
7679371c9d4SSatish Balay       rtmp[i] = 0.0;
7689371c9d4SSatish Balay       jl[i]   = mbs;
7699371c9d4SSatish Balay       il[0]   = 0;
77078910aadSHong Zhang     }
77178910aadSHong Zhang 
77278910aadSHong Zhang     for (k = 0; k < mbs; k++) {
77378910aadSHong Zhang       bval = ba + bi[k];
77478910aadSHong Zhang       /* initialize k-th row by the perm[k]-th row of A */
7759371c9d4SSatish Balay       jmin = ai[rip[k]];
7769371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
77778910aadSHong Zhang       for (j = jmin; j < jmax; j++) {
77878910aadSHong Zhang         col = rip[aj[j]];
77978910aadSHong Zhang         if (col >= k) { /* only take upper triangular entry */
78078910aadSHong Zhang           rtmp[col] = aa[j];
78178910aadSHong Zhang           *bval++   = 0.0; /* for in-place factorization */
78278910aadSHong Zhang         }
78378910aadSHong Zhang       }
7843cea4cbeSHong Zhang 
7853cea4cbeSHong Zhang       /* shift the diagonal of the matrix */
7863cea4cbeSHong Zhang       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
78778910aadSHong Zhang 
78878910aadSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
78978910aadSHong Zhang       dk = rtmp[k];
79078910aadSHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
79178910aadSHong Zhang 
79278910aadSHong Zhang       while (i < k) {
79378910aadSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
79478910aadSHong Zhang 
79578910aadSHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
79678910aadSHong Zhang         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
79778910aadSHong Zhang         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
79878910aadSHong Zhang         dk += uikdi * ba[ili];
79978910aadSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
80078910aadSHong Zhang 
80178910aadSHong Zhang         /* add multiple of row i to k-th row */
8029371c9d4SSatish Balay         jmin = ili + 1;
8039371c9d4SSatish Balay         jmax = bi[i + 1];
80478910aadSHong Zhang         if (jmin < jmax) {
80578910aadSHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
80678910aadSHong Zhang           /* update il and jl for row i */
80778910aadSHong Zhang           il[i] = jmin;
8089371c9d4SSatish Balay           j     = bj[jmin];
8099371c9d4SSatish Balay           jl[i] = jl[j];
8109371c9d4SSatish Balay           jl[j] = i;
81178910aadSHong Zhang         }
81278910aadSHong Zhang         i = nexti;
81378910aadSHong Zhang       }
81478910aadSHong Zhang 
8153cea4cbeSHong Zhang       /* shift the diagonals when zero pivot is detected */
8163cea4cbeSHong Zhang       /* compute rs=sum of abs(off-diagonal) */
8173cea4cbeSHong Zhang       rs   = 0.0;
81878910aadSHong Zhang       jmin = bi[k] + 1;
81978910aadSHong Zhang       nz   = bi[k + 1] - jmin;
82078910aadSHong Zhang       if (nz) {
82178910aadSHong Zhang         bcol = bj + jmin;
82278910aadSHong Zhang         while (nz--) {
82389f845c8SHong Zhang           rs += PetscAbsScalar(rtmp[*bcol]);
82489f845c8SHong Zhang           bcol++;
82578910aadSHong Zhang         }
82678910aadSHong Zhang       }
8273cea4cbeSHong Zhang 
8283cea4cbeSHong Zhang       sctx.rs = rs;
8293cea4cbeSHong Zhang       sctx.pv = dk;
8309566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
83107b50cabSHong Zhang       if (sctx.newshift) break;
8320e95ead3SHong Zhang       dk = sctx.pv;
83378910aadSHong Zhang 
83478910aadSHong Zhang       /* copy data into U(k,:) */
83578910aadSHong Zhang       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
8369371c9d4SSatish Balay       jmin      = bi[k] + 1;
8379371c9d4SSatish Balay       jmax      = bi[k + 1];
83878910aadSHong Zhang       if (jmin < jmax) {
83978910aadSHong Zhang         for (j = jmin; j < jmax; j++) {
8409371c9d4SSatish Balay           col       = bj[j];
8419371c9d4SSatish Balay           ba[j]     = rtmp[col];
8429371c9d4SSatish Balay           rtmp[col] = 0.0;
84378910aadSHong Zhang         }
84478910aadSHong Zhang         /* add the k-th row into il and jl */
84578910aadSHong Zhang         il[k] = jmin;
8469371c9d4SSatish Balay         i     = bj[jmin];
8479371c9d4SSatish Balay         jl[k] = jl[i];
8489371c9d4SSatish Balay         jl[i] = k;
84978910aadSHong Zhang       }
85078910aadSHong Zhang     }
85107b50cabSHong Zhang   } while (sctx.newshift);
8529566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, jl));
85378910aadSHong Zhang 
8549566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
85526fbe8dcSKarl Rupp 
85678910aadSHong Zhang   C->assembled    = PETSC_TRUE;
85778910aadSHong Zhang   C->preallocated = PETSC_TRUE;
85826fbe8dcSKarl Rupp 
8599566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->N));
8603cea4cbeSHong Zhang   if (sctx.nshift) {
861f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
8629566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
863f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
8649566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
86578910aadSHong Zhang     }
86678910aadSHong Zhang   }
8673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
868c05c3958SHong Zhang }
8694cd38560SBarry Smith 
870d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
871d71ae5a4SJacob Faibussowitsch {
87278910aadSHong Zhang   Mat_SeqBAIJ   *a = (Mat_SeqBAIJ *)A->data;
87378910aadSHong Zhang   Mat_SeqSBAIJ  *b = (Mat_SeqSBAIJ *)C->data;
8743cea4cbeSHong Zhang   PetscInt       i, j, am = a->mbs;
87578910aadSHong Zhang   PetscInt      *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
8763cea4cbeSHong Zhang   PetscInt       k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
87778910aadSHong Zhang   MatScalar     *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
8780e95ead3SHong Zhang   PetscReal      rs;
8790e95ead3SHong Zhang   FactorShiftCtx sctx;
88078910aadSHong Zhang 
881c05c3958SHong Zhang   PetscFunctionBegin;
8820e95ead3SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
8839566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
8843cea4cbeSHong Zhang 
8859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(am, &rtmp, am, &il, am, &jl));
88678910aadSHong Zhang 
88778910aadSHong Zhang   do {
88807b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
88978910aadSHong Zhang     for (i = 0; i < am; i++) {
8909371c9d4SSatish Balay       rtmp[i] = 0.0;
8919371c9d4SSatish Balay       jl[i]   = am;
8929371c9d4SSatish Balay       il[0]   = 0;
89378910aadSHong Zhang     }
89478910aadSHong Zhang 
89578910aadSHong Zhang     for (k = 0; k < am; k++) {
89678910aadSHong Zhang       /* initialize k-th row with elements nonzero in row perm(k) of A */
89778910aadSHong Zhang       nz   = ai[k + 1] - ai[k];
89878910aadSHong Zhang       acol = aj + ai[k];
89978910aadSHong Zhang       aval = aa + ai[k];
90078910aadSHong Zhang       bval = ba + bi[k];
90178910aadSHong Zhang       while (nz--) {
90278910aadSHong Zhang         if (*acol < k) { /* skip lower triangular entries */
9039371c9d4SSatish Balay           acol++;
9049371c9d4SSatish Balay           aval++;
90578910aadSHong Zhang         } else {
90678910aadSHong Zhang           rtmp[*acol++] = *aval++;
90778910aadSHong Zhang           *bval++       = 0.0; /* for in-place factorization */
90878910aadSHong Zhang         }
90978910aadSHong Zhang       }
9103cea4cbeSHong Zhang 
9113cea4cbeSHong Zhang       /* shift the diagonal of the matrix */
9123cea4cbeSHong Zhang       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
91378910aadSHong Zhang 
91478910aadSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
91578910aadSHong Zhang       dk = rtmp[k];
91678910aadSHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
91778910aadSHong Zhang 
91878910aadSHong Zhang       while (i < k) {
91978910aadSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
92078910aadSHong Zhang         /* compute multiplier, update D(k) and U(i,k) */
92178910aadSHong Zhang         ili   = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
92278910aadSHong Zhang         uikdi = -ba[ili] * ba[bi[i]];
92378910aadSHong Zhang         dk += uikdi * ba[ili];
92478910aadSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
92578910aadSHong Zhang 
92678910aadSHong Zhang         /* add multiple of row i to k-th row ... */
92778910aadSHong Zhang         jmin = ili + 1;
92878910aadSHong Zhang         nz   = bi[i + 1] - jmin;
92978910aadSHong Zhang         if (nz > 0) {
93078910aadSHong Zhang           bcol = bj + jmin;
93178910aadSHong Zhang           bval = ba + jmin;
93278910aadSHong Zhang           while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
93378910aadSHong Zhang           /* update il and jl for i-th row */
93478910aadSHong Zhang           il[i] = jmin;
9359371c9d4SSatish Balay           j     = bj[jmin];
9369371c9d4SSatish Balay           jl[i] = jl[j];
9379371c9d4SSatish Balay           jl[j] = i;
93878910aadSHong Zhang         }
93978910aadSHong Zhang         i = nexti;
94078910aadSHong Zhang       }
94178910aadSHong Zhang 
9423cea4cbeSHong Zhang       /* shift the diagonals when zero pivot is detected */
9433cea4cbeSHong Zhang       /* compute rs=sum of abs(off-diagonal) */
9443cea4cbeSHong Zhang       rs   = 0.0;
94578910aadSHong Zhang       jmin = bi[k] + 1;
94678910aadSHong Zhang       nz   = bi[k + 1] - jmin;
94778910aadSHong Zhang       if (nz) {
94878910aadSHong Zhang         bcol = bj + jmin;
94978910aadSHong Zhang         while (nz--) {
95089f845c8SHong Zhang           rs += PetscAbsScalar(rtmp[*bcol]);
95189f845c8SHong Zhang           bcol++;
95278910aadSHong Zhang         }
95378910aadSHong Zhang       }
9543cea4cbeSHong Zhang 
9553cea4cbeSHong Zhang       sctx.rs = rs;
9563cea4cbeSHong Zhang       sctx.pv = dk;
9579566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
95807b50cabSHong Zhang       if (sctx.newshift) break; /* sctx.shift_amount is updated */
9590e95ead3SHong Zhang       dk = sctx.pv;
96078910aadSHong Zhang 
96178910aadSHong Zhang       /* copy data into U(k,:) */
96278910aadSHong Zhang       ba[bi[k]] = 1.0 / dk;
96378910aadSHong Zhang       jmin      = bi[k] + 1;
96478910aadSHong Zhang       nz        = bi[k + 1] - jmin;
96578910aadSHong Zhang       if (nz) {
96678910aadSHong Zhang         bcol = bj + jmin;
96778910aadSHong Zhang         bval = ba + jmin;
96878910aadSHong Zhang         while (nz--) {
96978910aadSHong Zhang           *bval++       = rtmp[*bcol];
97078910aadSHong Zhang           rtmp[*bcol++] = 0.0;
97178910aadSHong Zhang         }
97278910aadSHong Zhang         /* add k-th row into il and jl */
97378910aadSHong Zhang         il[k] = jmin;
9749371c9d4SSatish Balay         i     = bj[jmin];
9759371c9d4SSatish Balay         jl[k] = jl[i];
9769371c9d4SSatish Balay         jl[i] = k;
97778910aadSHong Zhang       }
97878910aadSHong Zhang     }
97907b50cabSHong Zhang   } while (sctx.newshift);
9809566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, jl));
98178910aadSHong Zhang 
9820a3c351aSHong Zhang   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
9830a3c351aSHong Zhang   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
98478910aadSHong Zhang   C->assembled           = PETSC_TRUE;
98578910aadSHong Zhang   C->preallocated        = PETSC_TRUE;
98626fbe8dcSKarl Rupp 
9879566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->N));
9883cea4cbeSHong Zhang   if (sctx.nshift) {
989f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
9909566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
991f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
9929566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
99378910aadSHong Zhang     }
99478910aadSHong Zhang   }
9953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
996c05c3958SHong Zhang }
997c05c3958SHong Zhang 
998c6db04a5SJed Brown #include <petscbt.h>
999c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
1000d71ae5a4SJacob Faibussowitsch PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
1001d71ae5a4SJacob Faibussowitsch {
100278910aadSHong Zhang   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
100378910aadSHong Zhang   Mat_SeqSBAIJ      *b;
100478910aadSHong Zhang   Mat                B;
100548dd3d27SHong Zhang   PetscBool          perm_identity, missing;
10065d0c19d7SBarry Smith   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = a->mbs, bs = A->rmap->bs, *ui;
10075d0c19d7SBarry Smith   const PetscInt    *rip;
100878910aadSHong Zhang   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
10090298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, ncols, ncols_upper, *cols, *cols_lvl, *uj, **uj_ptr, **uj_lvl_ptr;
101078910aadSHong Zhang   PetscReal          fill = info->fill, levels = info->levels;
10110298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
10120298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
101378910aadSHong Zhang   PetscBT            lnkbt;
101478910aadSHong Zhang 
1015c05c3958SHong Zhang   PetscFunctionBegin;
10169566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
101728b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
101848dd3d27SHong Zhang 
10196ad2eaddSHong Zhang   if (bs > 1) {
102048a46eb9SPierre Jolivet     if (!a->sbaijMat) PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat));
1021719d5645SBarry Smith     (fact)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */
102226fbe8dcSKarl Rupp 
10239566063dSJacob Faibussowitsch     PetscCall(MatICCFactorSymbolic(fact, a->sbaijMat, perm, info));
10243ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
10256ad2eaddSHong Zhang   }
10266ad2eaddSHong Zhang 
10279566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
10289566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
102978910aadSHong Zhang 
103078910aadSHong Zhang   /* special case that simply copies fill pattern */
103178910aadSHong Zhang   if (!levels && perm_identity) {
10329566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(am + 1, &ui));
103326fbe8dcSKarl Rupp     for (i = 0; i < am; i++) ui[i] = ai[i + 1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
1034719d5645SBarry Smith     B = fact;
10359566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(B, 1, 0, ui));
103678910aadSHong Zhang 
103778910aadSHong Zhang     b  = (Mat_SeqSBAIJ *)B->data;
103878910aadSHong Zhang     uj = b->j;
103978910aadSHong Zhang     for (i = 0; i < am; i++) {
104078910aadSHong Zhang       aj = a->j + a->diag[i];
104126fbe8dcSKarl Rupp       for (j = 0; j < ui[i]; j++) *uj++ = *aj++;
104278910aadSHong Zhang       b->ilen[i] = ui[i];
104378910aadSHong Zhang     }
10449566063dSJacob Faibussowitsch     PetscCall(PetscFree(ui));
104526fbe8dcSKarl Rupp 
1046d5f3da31SBarry Smith     B->factortype = MAT_FACTOR_NONE;
104726fbe8dcSKarl Rupp 
10489566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
10499566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1050d5f3da31SBarry Smith     B->factortype = MAT_FACTOR_ICC;
105178910aadSHong Zhang 
105278910aadSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
10533ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
105478910aadSHong Zhang   }
105578910aadSHong Zhang 
105678910aadSHong Zhang   /* initialization */
10579566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
1058e60cf9a0SBarry Smith   ui[0] = 0;
10599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * am + 1, &cols_lvl));
106078910aadSHong Zhang 
106178910aadSHong Zhang   /* jl: linked list for storing indices of the pivot rows
106278910aadSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
10639566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &il, am, &jl));
106478910aadSHong Zhang   for (i = 0; i < am; i++) {
10659371c9d4SSatish Balay     jl[i] = am;
10669371c9d4SSatish Balay     il[i] = 0;
106778910aadSHong Zhang   }
106878910aadSHong Zhang 
106978910aadSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
107078910aadSHong Zhang   nlnk = am + 1;
10719566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
107278910aadSHong Zhang 
107395b5ac22SHong Zhang   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
10749566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am] / 2, am / 2)), &free_space));
107526fbe8dcSKarl Rupp 
107678910aadSHong Zhang   current_space = free_space;
107726fbe8dcSKarl Rupp 
10789566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am] / 2, am / 2)), &free_space_lvl));
107978910aadSHong Zhang   current_space_lvl = free_space_lvl;
108078910aadSHong Zhang 
108178910aadSHong Zhang   for (k = 0; k < am; k++) { /* for each active row k */
108278910aadSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
108378910aadSHong Zhang     nzk         = 0;
108478910aadSHong Zhang     ncols       = ai[rip[k] + 1] - ai[rip[k]];
108578910aadSHong Zhang     ncols_upper = 0;
108678910aadSHong Zhang     cols        = cols_lvl + am;
108778910aadSHong Zhang     for (j = 0; j < ncols; j++) {
108878910aadSHong Zhang       i = rip[*(aj + ai[rip[k]] + j)];
108978910aadSHong Zhang       if (i >= k) { /* only take upper triangular entry */
109078910aadSHong Zhang         cols[ncols_upper]     = i;
109178910aadSHong Zhang         cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */
109278910aadSHong Zhang         ncols_upper++;
109378910aadSHong Zhang       }
109478910aadSHong Zhang     }
10959566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLAdd(ncols_upper, cols, levels, cols_lvl, am, &nlnk, lnk, lnk_lvl, lnkbt));
109678910aadSHong Zhang     nzk += nlnk;
109778910aadSHong Zhang 
109878910aadSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
109978910aadSHong Zhang     prow = jl[k]; /* 1st pivot row */
110078910aadSHong Zhang 
110178910aadSHong Zhang     while (prow < k) {
110278910aadSHong Zhang       nextprow = jl[prow];
110378910aadSHong Zhang 
110478910aadSHong Zhang       /* merge prow into k-th row */
110578910aadSHong Zhang       jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
110678910aadSHong Zhang       jmax  = ui[prow + 1];
110778910aadSHong Zhang       ncols = jmax - jmin;
110878910aadSHong Zhang       i     = jmin - ui[prow];
110978910aadSHong Zhang       cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
111078910aadSHong Zhang       for (j = 0; j < ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
11119566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLAddSorted(ncols, cols, levels, cols_lvl, am, &nlnk, lnk, lnk_lvl, lnkbt));
111278910aadSHong Zhang       nzk += nlnk;
111378910aadSHong Zhang 
111478910aadSHong Zhang       /* update il and jl for prow */
111578910aadSHong Zhang       if (jmin < jmax) {
111678910aadSHong Zhang         il[prow] = jmin;
111726fbe8dcSKarl Rupp 
11189371c9d4SSatish Balay         j        = *cols;
11199371c9d4SSatish Balay         jl[prow] = jl[j];
11209371c9d4SSatish Balay         jl[j]    = prow;
112178910aadSHong Zhang       }
112278910aadSHong Zhang       prow = nextprow;
112378910aadSHong Zhang     }
112478910aadSHong Zhang 
112578910aadSHong Zhang     /* if free space is not available, make more free space */
112678910aadSHong Zhang     if (current_space->local_remaining < nzk) {
112778910aadSHong Zhang       i = am - k + 1;                                                             /* num of unfactored rows */
1128f91af8c7SBarry Smith       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
11299566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
11309566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
113178910aadSHong Zhang       reallocs++;
113278910aadSHong Zhang     }
113378910aadSHong Zhang 
113478910aadSHong Zhang     /* copy data into free_space and free_space_lvl, then initialize lnk */
11359566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
113678910aadSHong Zhang 
113778910aadSHong Zhang     /* add the k-th row into il and jl */
113878910aadSHong Zhang     if (nzk - 1 > 0) {
113978910aadSHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
11409371c9d4SSatish Balay       jl[k] = jl[i];
11419371c9d4SSatish Balay       jl[i] = k;
114278910aadSHong Zhang       il[k] = ui[k] + 1;
114378910aadSHong Zhang     }
114478910aadSHong Zhang     uj_ptr[k]     = current_space->array;
114578910aadSHong Zhang     uj_lvl_ptr[k] = current_space_lvl->array;
114678910aadSHong Zhang 
114778910aadSHong Zhang     current_space->array += nzk;
114878910aadSHong Zhang     current_space->local_used += nzk;
114978910aadSHong Zhang     current_space->local_remaining -= nzk;
115078910aadSHong Zhang 
115178910aadSHong Zhang     current_space_lvl->array += nzk;
115278910aadSHong Zhang     current_space_lvl->local_used += nzk;
115378910aadSHong Zhang     current_space_lvl->local_remaining -= nzk;
115478910aadSHong Zhang 
115578910aadSHong Zhang     ui[k + 1] = ui[k] + nzk;
115678910aadSHong Zhang   }
115778910aadSHong Zhang 
11589566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
11599566063dSJacob Faibussowitsch   PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, il, jl));
11609566063dSJacob Faibussowitsch   PetscCall(PetscFree(cols_lvl));
116178910aadSHong Zhang 
11629263d837SHong Zhang   /* copy free_space into uj and free free_space; set uj in new datastructure; */
11639566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
11649566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
11659566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
11669566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
116778910aadSHong Zhang 
116878910aadSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
1169719d5645SBarry Smith   B = fact;
11709566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetPreallocation(B, 1, MAT_SKIP_ALLOCATION, NULL));
117178910aadSHong Zhang 
117278910aadSHong Zhang   b               = (Mat_SeqSBAIJ *)B->data;
117378910aadSHong Zhang   b->singlemalloc = PETSC_FALSE;
1174e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
1175e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
117626fbe8dcSKarl Rupp 
11779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
117826fbe8dcSKarl Rupp 
117978910aadSHong Zhang   b->j             = uj;
118078910aadSHong Zhang   b->i             = ui;
1181f4259b30SLisandro Dalcin   b->diag          = NULL;
1182f4259b30SLisandro Dalcin   b->ilen          = NULL;
1183f4259b30SLisandro Dalcin   b->imax          = NULL;
118478910aadSHong Zhang   b->row           = perm;
118578910aadSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
118626fbe8dcSKarl Rupp 
11879566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
118826fbe8dcSKarl Rupp 
118978910aadSHong Zhang   b->icol = perm;
119026fbe8dcSKarl Rupp 
11919566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
11929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
119326fbe8dcSKarl Rupp 
119478910aadSHong Zhang   b->maxnz = b->nz = ui[am];
119578910aadSHong Zhang 
119678910aadSHong Zhang   B->info.factor_mallocs   = reallocs;
119778910aadSHong Zhang   B->info.fill_ratio_given = fill;
119875567043SBarry Smith   if (ai[am] != 0.) {
1199da81f932SPierre Jolivet     /* nonzeros in lower triangular part of A (including diagonals)= (ai[am]+am)/2 */
120095b5ac22SHong Zhang     B->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
120178910aadSHong Zhang   } else {
120278910aadSHong Zhang     B->info.fill_ratio_needed = 0.0;
120378910aadSHong Zhang   }
12049263d837SHong Zhang #if defined(PETSC_USE_INFO)
12059263d837SHong Zhang   if (ai[am] != 0) {
120695b5ac22SHong Zhang     PetscReal af = B->info.fill_ratio_needed;
12079566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
12089566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
12099566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
12109263d837SHong Zhang   } else {
12119566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
12129263d837SHong Zhang   }
12139263d837SHong Zhang #endif
121478910aadSHong Zhang   if (perm_identity) {
12150a3c351aSHong Zhang     B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
12160a3c351aSHong Zhang     B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
121778910aadSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
121878910aadSHong Zhang   } else {
1219719d5645SBarry Smith     (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
122078910aadSHong Zhang   }
12213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1222c05c3958SHong Zhang }
1223c05c3958SHong Zhang 
1224d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
1225d71ae5a4SJacob Faibussowitsch {
122678910aadSHong Zhang   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
122778910aadSHong Zhang   Mat_SeqSBAIJ      *b;
122878910aadSHong Zhang   Mat                B;
12299186b105SHong Zhang   PetscBool          perm_identity, missing;
123078910aadSHong Zhang   PetscReal          fill = info->fill;
12315d0c19d7SBarry Smith   const PetscInt    *rip;
12325d0c19d7SBarry Smith   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, *ai = a->i, *aj = a->j, reallocs = 0, prow;
123378910aadSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
123478910aadSHong Zhang   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
12350298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
123678910aadSHong Zhang   PetscBT            lnkbt;
123778910aadSHong Zhang 
1238c05c3958SHong Zhang   PetscFunctionBegin;
12396ad2eaddSHong Zhang   if (bs > 1) { /* convert to seqsbaij */
124048a46eb9SPierre Jolivet     if (!a->sbaijMat) PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat));
1241719d5645SBarry Smith     (fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */
124226fbe8dcSKarl Rupp 
12439566063dSJacob Faibussowitsch     PetscCall(MatCholeskyFactorSymbolic(fact, a->sbaijMat, perm, info));
12443ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
12456ad2eaddSHong Zhang   }
12466ad2eaddSHong Zhang 
12479566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
124828b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
12499186b105SHong Zhang 
125078910aadSHong Zhang   /* check whether perm is the identity mapping */
12519566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
125228b400f6SJacob Faibussowitsch   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported");
12539566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
125478910aadSHong Zhang 
125578910aadSHong Zhang   /* initialization */
12569566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs + 1, &ui));
1257e60cf9a0SBarry Smith   ui[0] = 0;
125878910aadSHong Zhang 
125978910aadSHong Zhang   /* jl: linked list for storing indices of the pivot rows
126078910aadSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
12619566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
126278910aadSHong Zhang   for (i = 0; i < mbs; i++) {
12639371c9d4SSatish Balay     jl[i] = mbs;
12649371c9d4SSatish Balay     il[i] = 0;
126578910aadSHong Zhang   }
126678910aadSHong Zhang 
126778910aadSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
126878910aadSHong Zhang   nlnk = mbs + 1;
12699566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
127078910aadSHong Zhang 
12716a69fef8SHong Zhang   /* initial FreeSpace size is fill* (ai[mbs]+mbs)/2 */
12729566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[mbs] / 2, mbs / 2)), &free_space));
127326fbe8dcSKarl Rupp 
127478910aadSHong Zhang   current_space = free_space;
127578910aadSHong Zhang 
127678910aadSHong Zhang   for (k = 0; k < mbs; k++) { /* for each active row k */
127778910aadSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
127878910aadSHong Zhang     nzk         = 0;
127978910aadSHong Zhang     ncols       = ai[rip[k] + 1] - ai[rip[k]];
1280e60cf9a0SBarry Smith     ncols_upper = 0;
128178910aadSHong Zhang     for (j = 0; j < ncols; j++) {
128278910aadSHong Zhang       i = rip[*(aj + ai[rip[k]] + j)];
128378910aadSHong Zhang       if (i >= k) { /* only take upper triangular entry */
128478910aadSHong Zhang         cols[ncols_upper] = i;
128578910aadSHong Zhang         ncols_upper++;
128678910aadSHong Zhang       }
128778910aadSHong Zhang     }
12889566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols_upper, cols, mbs, &nlnk, lnk, lnkbt));
128978910aadSHong Zhang     nzk += nlnk;
129078910aadSHong Zhang 
129178910aadSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
129278910aadSHong Zhang     prow = jl[k]; /* 1st pivot row */
129378910aadSHong Zhang 
129478910aadSHong Zhang     while (prow < k) {
129578910aadSHong Zhang       nextprow = jl[prow];
129678910aadSHong Zhang       /* merge prow into k-th row */
129778910aadSHong Zhang       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
129878910aadSHong Zhang       jmax   = ui[prow + 1];
129978910aadSHong Zhang       ncols  = jmax - jmin;
130078910aadSHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
13019566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
130278910aadSHong Zhang       nzk += nlnk;
130378910aadSHong Zhang 
130478910aadSHong Zhang       /* update il and jl for prow */
130578910aadSHong Zhang       if (jmin < jmax) {
130678910aadSHong Zhang         il[prow] = jmin;
130726fbe8dcSKarl Rupp         j        = *uj_ptr;
130826fbe8dcSKarl Rupp         jl[prow] = jl[j];
130926fbe8dcSKarl Rupp         jl[j]    = prow;
131078910aadSHong Zhang       }
131178910aadSHong Zhang       prow = nextprow;
131278910aadSHong Zhang     }
131378910aadSHong Zhang 
131478910aadSHong Zhang     /* if free space is not available, make more free space */
131578910aadSHong Zhang     if (current_space->local_remaining < nzk) {
131678910aadSHong Zhang       i = mbs - k + 1;                                                            /* num of unfactored rows */
1317f91af8c7SBarry Smith       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
13189566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
131978910aadSHong Zhang       reallocs++;
132078910aadSHong Zhang     }
132178910aadSHong Zhang 
132278910aadSHong Zhang     /* copy data into free space, then initialize lnk */
13239566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
132478910aadSHong Zhang 
132578910aadSHong Zhang     /* add the k-th row into il and jl */
132678910aadSHong Zhang     if (nzk - 1 > 0) {
132778910aadSHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
13289371c9d4SSatish Balay       jl[k] = jl[i];
13299371c9d4SSatish Balay       jl[i] = k;
133078910aadSHong Zhang       il[k] = ui[k] + 1;
133178910aadSHong Zhang     }
133278910aadSHong Zhang     ui_ptr[k] = current_space->array;
133378910aadSHong Zhang     current_space->array += nzk;
133478910aadSHong Zhang     current_space->local_used += nzk;
133578910aadSHong Zhang     current_space->local_remaining -= nzk;
133678910aadSHong Zhang 
133778910aadSHong Zhang     ui[k + 1] = ui[k] + nzk;
133878910aadSHong Zhang   }
133978910aadSHong Zhang 
13409566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
13419566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, il, jl, cols));
134278910aadSHong Zhang 
13439263d837SHong Zhang   /* copy free_space into uj and free free_space; set uj in new datastructure; */
13449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
13459566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
13469566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
134778910aadSHong Zhang 
134878910aadSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
1349719d5645SBarry Smith   B = fact;
13509566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
135178910aadSHong Zhang 
135278910aadSHong Zhang   b               = (Mat_SeqSBAIJ *)B->data;
135378910aadSHong Zhang   b->singlemalloc = PETSC_FALSE;
1354e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
1355e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
135626fbe8dcSKarl Rupp 
13579566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
135826fbe8dcSKarl Rupp 
135978910aadSHong Zhang   b->j             = uj;
136078910aadSHong Zhang   b->i             = ui;
1361f4259b30SLisandro Dalcin   b->diag          = NULL;
1362f4259b30SLisandro Dalcin   b->ilen          = NULL;
1363f4259b30SLisandro Dalcin   b->imax          = NULL;
136478910aadSHong Zhang   b->row           = perm;
136578910aadSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
136626fbe8dcSKarl Rupp 
13679566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
136878910aadSHong Zhang   b->icol = perm;
13699566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
13709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
137178910aadSHong Zhang   b->maxnz = b->nz = ui[mbs];
137278910aadSHong Zhang 
137378910aadSHong Zhang   B->info.factor_mallocs   = reallocs;
137478910aadSHong Zhang   B->info.fill_ratio_given = fill;
137575567043SBarry Smith   if (ai[mbs] != 0.) {
137695b5ac22SHong Zhang     /* nonzeros in lower triangular part of A = (ai[mbs]+mbs)/2 */
137795b5ac22SHong Zhang     B->info.fill_ratio_needed = ((PetscReal)2 * ui[mbs]) / (ai[mbs] + mbs);
137878910aadSHong Zhang   } else {
137978910aadSHong Zhang     B->info.fill_ratio_needed = 0.0;
138078910aadSHong Zhang   }
13819263d837SHong Zhang #if defined(PETSC_USE_INFO)
13829263d837SHong Zhang   if (ai[mbs] != 0.) {
13839263d837SHong Zhang     PetscReal af = B->info.fill_ratio_needed;
13849566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
13859566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
13869566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
13879263d837SHong Zhang   } else {
13889566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
13899263d837SHong Zhang   }
13909263d837SHong Zhang #endif
139178910aadSHong Zhang   if (perm_identity) {
13926ad2eaddSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
139378910aadSHong Zhang   } else {
13946ad2eaddSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
139578910aadSHong Zhang   }
13963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1397c05c3958SHong Zhang }
1398c8342467SHong Zhang 
1399d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering(Mat A, Vec bb, Vec xx)
1400d71ae5a4SJacob Faibussowitsch {
14011a83e813SShri Abhyankar   Mat_SeqBAIJ       *a  = (Mat_SeqBAIJ *)A->data;
14021a83e813SShri Abhyankar   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
14031a83e813SShri Abhyankar   PetscInt           i, k, n                       = a->mbs;
14041a83e813SShri Abhyankar   PetscInt           nz, bs = A->rmap->bs, bs2 = a->bs2;
1405d9ca1df4SBarry Smith   const MatScalar   *aa = a->a, *v;
1406d9ca1df4SBarry Smith   PetscScalar       *x, *s, *t, *ls;
1407d9ca1df4SBarry Smith   const PetscScalar *b;
14081a83e813SShri Abhyankar 
14091a83e813SShri Abhyankar   PetscFunctionBegin;
1410*ce6b3536SJed Brown   PetscCheck(bs > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected bs %" PetscInt_FMT " > 0", bs);
14119566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
14129566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
14131a83e813SShri Abhyankar   t = a->solve_work;
14141a83e813SShri Abhyankar 
14151a83e813SShri Abhyankar   /* forward solve the lower triangular */
14169566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(t, b, bs)); /* copy 1st block of b to t */
14171a83e813SShri Abhyankar 
14181a83e813SShri Abhyankar   for (i = 1; i < n; i++) {
14191a83e813SShri Abhyankar     v  = aa + bs2 * ai[i];
14201a83e813SShri Abhyankar     vi = aj + ai[i];
14211a83e813SShri Abhyankar     nz = ai[i + 1] - ai[i];
14221a83e813SShri Abhyankar     s  = t + bs * i;
14239566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(s, b + bs * i, bs)); /* copy i_th block of b to t */
14241a83e813SShri Abhyankar     for (k = 0; k < nz; k++) {
142596b95a6bSBarry Smith       PetscKernel_v_gets_v_minus_A_times_w(bs, s, v, t + bs * vi[k]);
14261a83e813SShri Abhyankar       v += bs2;
14271a83e813SShri Abhyankar     }
14281a83e813SShri Abhyankar   }
14291a83e813SShri Abhyankar 
14301a83e813SShri Abhyankar   /* backward solve the upper triangular */
14311a83e813SShri Abhyankar   ls = a->solve_work + A->cmap->n;
14321a83e813SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
14331a83e813SShri Abhyankar     v  = aa + bs2 * (adiag[i + 1] + 1);
14341a83e813SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
14351a83e813SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
14369566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ls, t + i * bs, bs));
14371a83e813SShri Abhyankar     for (k = 0; k < nz; k++) {
143896b95a6bSBarry Smith       PetscKernel_v_gets_v_minus_A_times_w(bs, ls, v, t + bs * vi[k]);
14391a83e813SShri Abhyankar       v += bs2;
14401a83e813SShri Abhyankar     }
144196b95a6bSBarry Smith     PetscKernel_w_gets_A_times_v(bs, ls, aa + bs2 * adiag[i], t + i * bs); /* *inv(diagonal[i]) */
14429566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(x + i * bs, t + i * bs, bs));
14431a83e813SShri Abhyankar   }
14441a83e813SShri Abhyankar 
14459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
14469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
14479566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n));
14483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14491a83e813SShri Abhyankar }
14501a83e813SShri Abhyankar 
1451d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_N(Mat A, Vec bb, Vec xx)
1452d71ae5a4SJacob Faibussowitsch {
145335aa4fcfSShri Abhyankar   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
145435aa4fcfSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
145535aa4fcfSShri Abhyankar   const PetscInt    *r, *c, *rout, *cout, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
145635aa4fcfSShri Abhyankar   PetscInt           i, m, n = a->mbs;
145735aa4fcfSShri Abhyankar   PetscInt           nz, bs = A->rmap->bs, bs2 = a->bs2;
1458d9ca1df4SBarry Smith   const MatScalar   *aa = a->a, *v;
1459d9ca1df4SBarry Smith   PetscScalar       *x, *s, *t, *ls;
1460d9ca1df4SBarry Smith   const PetscScalar *b;
146135aa4fcfSShri Abhyankar 
146235aa4fcfSShri Abhyankar   PetscFunctionBegin;
1463*ce6b3536SJed Brown   PetscCheck(bs > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected bs %" PetscInt_FMT " > 0", bs);
14649566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
14659566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
146635aa4fcfSShri Abhyankar   t = a->solve_work;
146735aa4fcfSShri Abhyankar 
14689371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
14699371c9d4SSatish Balay   r = rout;
14709371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
14719371c9d4SSatish Balay   c = cout;
147235aa4fcfSShri Abhyankar 
147335aa4fcfSShri Abhyankar   /* forward solve the lower triangular */
14749566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(t, b + bs * r[0], bs));
147535aa4fcfSShri Abhyankar   for (i = 1; i < n; i++) {
147635aa4fcfSShri Abhyankar     v  = aa + bs2 * ai[i];
147735aa4fcfSShri Abhyankar     vi = aj + ai[i];
147835aa4fcfSShri Abhyankar     nz = ai[i + 1] - ai[i];
147935aa4fcfSShri Abhyankar     s  = t + bs * i;
14809566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(s, b + bs * r[i], bs));
148135aa4fcfSShri Abhyankar     for (m = 0; m < nz; m++) {
148296b95a6bSBarry Smith       PetscKernel_v_gets_v_minus_A_times_w(bs, s, v, t + bs * vi[m]);
148335aa4fcfSShri Abhyankar       v += bs2;
148435aa4fcfSShri Abhyankar     }
148535aa4fcfSShri Abhyankar   }
148635aa4fcfSShri Abhyankar 
148735aa4fcfSShri Abhyankar   /* backward solve the upper triangular */
148835aa4fcfSShri Abhyankar   ls = a->solve_work + A->cmap->n;
148935aa4fcfSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
149035aa4fcfSShri Abhyankar     v  = aa + bs2 * (adiag[i + 1] + 1);
149135aa4fcfSShri Abhyankar     vi = aj + adiag[i + 1] + 1;
149235aa4fcfSShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
14939566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ls, t + i * bs, bs));
149435aa4fcfSShri Abhyankar     for (m = 0; m < nz; m++) {
149596b95a6bSBarry Smith       PetscKernel_v_gets_v_minus_A_times_w(bs, ls, v, t + bs * vi[m]);
149635aa4fcfSShri Abhyankar       v += bs2;
149735aa4fcfSShri Abhyankar     }
149896b95a6bSBarry Smith     PetscKernel_w_gets_A_times_v(bs, ls, v, t + i * bs); /* *inv(diagonal[i]) */
14999566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(x + bs * c[i], t + i * bs, bs));
150035aa4fcfSShri Abhyankar   }
15019566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
15029566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
15039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
15049566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
15059566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n));
15063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
150735aa4fcfSShri Abhyankar }
150835aa4fcfSShri Abhyankar 
1509a25532f0SBarry Smith /*
1510a25532f0SBarry Smith     For each block in an block array saves the largest absolute value in the block into another array
1511a25532f0SBarry Smith */
1512d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatBlockAbs_private(PetscInt nbs, PetscInt bs2, PetscScalar *blockarray, PetscReal *absarray)
1513d71ae5a4SJacob Faibussowitsch {
15142efa7f71SHong Zhang   PetscInt i, j;
15155fd66863SKarl Rupp 
15162efa7f71SHong Zhang   PetscFunctionBegin;
15179566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(absarray, nbs + 1));
15182efa7f71SHong Zhang   for (i = 0; i < nbs; i++) {
15192efa7f71SHong Zhang     for (j = 0; j < bs2; j++) {
15202efa7f71SHong Zhang       if (absarray[i] < PetscAbsScalar(blockarray[i * nbs + j])) absarray[i] = PetscAbsScalar(blockarray[i * nbs + j]);
15212efa7f71SHong Zhang     }
15222efa7f71SHong Zhang   }
15233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15242efa7f71SHong Zhang }
15252efa7f71SHong Zhang 
1526fe97e370SBarry Smith /*
1527fe97e370SBarry Smith      This needs to be renamed and called by the regular MatILUFactor_SeqBAIJ when drop tolerance is used
1528fe97e370SBarry Smith */
1529d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
1530d71ae5a4SJacob Faibussowitsch {
15312efa7f71SHong Zhang   Mat             B = *fact;
15322efa7f71SHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data, *b;
15332efa7f71SHong Zhang   IS              isicol;
15342efa7f71SHong Zhang   const PetscInt *r, *ic;
15352efa7f71SHong Zhang   PetscInt        i, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
15362efa7f71SHong Zhang   PetscInt       *bi, *bj, *bdiag;
15372efa7f71SHong Zhang 
15382efa7f71SHong Zhang   PetscInt   row, nzi, nzi_bl, nzi_bu, *im, dtcount, nzi_al, nzi_au;
15392efa7f71SHong Zhang   PetscInt   nlnk, *lnk;
15402efa7f71SHong Zhang   PetscBT    lnkbt;
1541ace3abfcSBarry Smith   PetscBool  row_identity, icol_identity;
15422efa7f71SHong Zhang   MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, *multiplier, *vtmp;
15432efa7f71SHong Zhang   PetscInt   j, nz, *pj, *bjtmp, k, ncut, *jtmp;
15442efa7f71SHong Zhang 
1545182b8fbaSHong Zhang   PetscReal  dt = info->dt; /* shift=info->shiftamount; */
15462efa7f71SHong Zhang   PetscInt   nnz_max;
1547ace3abfcSBarry Smith   PetscBool  missing;
1548021822bcSHong Zhang   PetscReal *vtmp_abs;
154997ef94ebSSatish Balay   MatScalar *v_work;
155097ef94ebSSatish Balay   PetscInt  *v_pivots;
15515f8bbccaSHong Zhang   PetscBool  allowzeropivot, zeropivotdetected = PETSC_FALSE;
15522efa7f71SHong Zhang 
1553c8342467SHong Zhang   PetscFunctionBegin;
15542ef1f0ffSBarry Smith   /* symbolic factorization, can be reused */
15559566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
155628b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
15572efa7f71SHong Zhang   adiag = a->diag;
15582efa7f71SHong Zhang 
15599566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
15602efa7f71SHong Zhang 
15612efa7f71SHong Zhang   /* bdiag is location of diagonal in factor */
15629566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs + 1, &bdiag));
15632efa7f71SHong Zhang 
15642efa7f71SHong Zhang   /* allocate row pointers bi */
15659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * mbs + 2, &bi));
15662efa7f71SHong Zhang 
15672efa7f71SHong Zhang   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
15682efa7f71SHong Zhang   dtcount = (PetscInt)info->dtcount;
15696bce7ff8SHong Zhang   if (dtcount > mbs - 1) dtcount = mbs - 1;
15706bce7ff8SHong Zhang   nnz_max = ai[mbs] + 2 * mbs * dtcount + 2;
15716da515a1SHong Zhang   /* printf("MatILUDTFactor_SeqBAIJ, bs %d, ai[mbs] %d, nnz_max  %d, dtcount %d\n",bs,ai[mbs],nnz_max,dtcount); */
15729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz_max, &bj));
15736bce7ff8SHong Zhang   nnz_max = nnz_max * bs2;
15749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz_max, &ba));
15752efa7f71SHong Zhang 
15762efa7f71SHong Zhang   /* put together the new matrix */
15779566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
157826fbe8dcSKarl Rupp 
15792efa7f71SHong Zhang   b               = (Mat_SeqBAIJ *)(B)->data;
15802efa7f71SHong Zhang   b->free_a       = PETSC_TRUE;
15812efa7f71SHong Zhang   b->free_ij      = PETSC_TRUE;
15822efa7f71SHong Zhang   b->singlemalloc = PETSC_FALSE;
158326fbe8dcSKarl Rupp 
15842efa7f71SHong Zhang   b->a    = ba;
15852efa7f71SHong Zhang   b->j    = bj;
15862efa7f71SHong Zhang   b->i    = bi;
15872efa7f71SHong Zhang   b->diag = bdiag;
1588f4259b30SLisandro Dalcin   b->ilen = NULL;
1589f4259b30SLisandro Dalcin   b->imax = NULL;
15902efa7f71SHong Zhang   b->row  = isrow;
15912efa7f71SHong Zhang   b->col  = iscol;
159226fbe8dcSKarl Rupp 
15939566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
15949566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
159526fbe8dcSKarl Rupp 
15962efa7f71SHong Zhang   b->icol = isicol;
15979566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bs * (mbs + 1), &b->solve_work));
15982efa7f71SHong Zhang   b->maxnz = nnz_max / bs2;
15992efa7f71SHong Zhang 
1600d5f3da31SBarry Smith   (B)->factortype            = MAT_FACTOR_ILUDT;
16012efa7f71SHong Zhang   (B)->info.factor_mallocs   = 0;
16022efa7f71SHong Zhang   (B)->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)(ai[mbs] * bs2));
16032ef1f0ffSBarry Smith   /* end of symbolic factorization */
16049566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
16059566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
16062efa7f71SHong Zhang 
16072efa7f71SHong Zhang   /* linked list for storing column indices of the active row */
16082efa7f71SHong Zhang   nlnk = mbs + 1;
16099566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
16102efa7f71SHong Zhang 
16112efa7f71SHong Zhang   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
16129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(mbs, &im, mbs, &jtmp));
16132efa7f71SHong Zhang   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
16149566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(mbs * bs2, &rtmp, mbs * bs2, &vtmp));
16159566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs + 1, &vtmp_abs));
16169566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(bs, &v_work, bs2, &multiplier, bs, &v_pivots));
16172efa7f71SHong Zhang 
16185f8bbccaSHong Zhang   allowzeropivot  = PetscNot(A->erroriffailure);
1619e60cf9a0SBarry Smith   bi[0]           = 0;
16202efa7f71SHong Zhang   bdiag[0]        = (nnz_max / bs2) - 1; /* location of diagonal in factor B */
16216bce7ff8SHong Zhang   bi[2 * mbs + 1] = bdiag[0] + 1;        /* endof bj and ba array */
16222efa7f71SHong Zhang   for (i = 0; i < mbs; i++) {
16232efa7f71SHong Zhang     /* copy initial fill into linked list */
16242efa7f71SHong Zhang     nzi = ai[r[i] + 1] - ai[r[i]];
162528b400f6SJacob Faibussowitsch     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
16262efa7f71SHong Zhang     nzi_al = adiag[r[i]] - ai[r[i]];
16272efa7f71SHong Zhang     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
16282efa7f71SHong Zhang 
16292efa7f71SHong Zhang     /* load in initial unfactored row */
16302efa7f71SHong Zhang     ajtmp = aj + ai[r[i]];
16319566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, mbs, &nlnk, lnk, lnkbt));
16329566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(rtmp, mbs * bs2));
16332efa7f71SHong Zhang     aatmp = a->a + bs2 * ai[r[i]];
16349566063dSJacob Faibussowitsch     for (j = 0; j < nzi; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ic[ajtmp[j]], aatmp + bs2 * j, bs2));
16352efa7f71SHong Zhang 
16362efa7f71SHong Zhang     /* add pivot rows into linked list */
16372efa7f71SHong Zhang     row = lnk[mbs];
16382efa7f71SHong Zhang     while (row < i) {
16392efa7f71SHong Zhang       nzi_bl = bi[row + 1] - bi[row] + 1;
16402efa7f71SHong Zhang       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
16419566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
16422efa7f71SHong Zhang       nzi += nlnk;
16432efa7f71SHong Zhang       row = lnk[row];
16442efa7f71SHong Zhang     }
16452efa7f71SHong Zhang 
16462efa7f71SHong Zhang     /* copy data from lnk into jtmp, then initialize lnk */
16479566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(mbs, mbs, nzi, lnk, jtmp, lnkbt));
16482efa7f71SHong Zhang 
16492efa7f71SHong Zhang     /* numerical factorization */
16502efa7f71SHong Zhang     bjtmp = jtmp;
16512efa7f71SHong Zhang     row   = *bjtmp++; /* 1st pivot row */
16522efa7f71SHong Zhang 
16532efa7f71SHong Zhang     while (row < i) {
16542efa7f71SHong Zhang       pc = rtmp + bs2 * row;
16552efa7f71SHong Zhang       pv = ba + bs2 * bdiag[row];                           /* inv(diag) of the pivot row */
165696b95a6bSBarry Smith       PetscKernel_A_gets_A_times_B(bs, pc, pv, multiplier); /* pc= multiplier = pc*inv(diag[row]) */
16579566063dSJacob Faibussowitsch       PetscCall(MatBlockAbs_private(1, bs2, pc, vtmp_abs));
16582efa7f71SHong Zhang       if (vtmp_abs[0] > dt) {         /* apply tolerance dropping rule */
16592efa7f71SHong Zhang         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
16602efa7f71SHong Zhang         pv = ba + bs2 * (bdiag[row + 1] + 1);
16612efa7f71SHong Zhang         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
1662ad540459SPierre Jolivet         for (j = 0; j < nz; j++) PetscKernel_A_gets_A_minus_B_times_C(bs, rtmp + bs2 * pj[j], pc, pv + bs2 * j);
16639566063dSJacob Faibussowitsch         /* PetscCall(PetscLogFlops(bslog*(nz+1.0)-bs)); */
16642efa7f71SHong Zhang       }
16652efa7f71SHong Zhang       row = *bjtmp++;
16662efa7f71SHong Zhang     }
16672efa7f71SHong Zhang 
16682efa7f71SHong Zhang     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
16699371c9d4SSatish Balay     nzi_bl = 0;
16709371c9d4SSatish Balay     j      = 0;
16712efa7f71SHong Zhang     while (jtmp[j] < i) { /* L-part. Note: jtmp is sorted */
16729566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(vtmp + bs2 * j, rtmp + bs2 * jtmp[j], bs2));
16739371c9d4SSatish Balay       nzi_bl++;
16749371c9d4SSatish Balay       j++;
16752efa7f71SHong Zhang     }
16762efa7f71SHong Zhang     nzi_bu = nzi - nzi_bl - 1;
16772efa7f71SHong Zhang 
16782efa7f71SHong Zhang     while (j < nzi) { /* U-part */
16799566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(vtmp + bs2 * j, rtmp + bs2 * jtmp[j], bs2));
16802efa7f71SHong Zhang       j++;
16812efa7f71SHong Zhang     }
16822efa7f71SHong Zhang 
16839566063dSJacob Faibussowitsch     PetscCall(MatBlockAbs_private(nzi, bs2, vtmp, vtmp_abs));
16845f8bbccaSHong Zhang 
16852efa7f71SHong Zhang     bjtmp = bj + bi[i];
16862efa7f71SHong Zhang     batmp = ba + bs2 * bi[i];
16872efa7f71SHong Zhang     /* apply level dropping rule to L part */
16882efa7f71SHong Zhang     ncut = nzi_al + dtcount;
16892efa7f71SHong Zhang     if (ncut < nzi_bl) {
16909566063dSJacob Faibussowitsch       PetscCall(PetscSortSplitReal(ncut, nzi_bl, vtmp_abs, jtmp));
16919566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
16922efa7f71SHong Zhang     } else {
16932efa7f71SHong Zhang       ncut = nzi_bl;
16942efa7f71SHong Zhang     }
16952efa7f71SHong Zhang     for (j = 0; j < ncut; j++) {
16962efa7f71SHong Zhang       bjtmp[j] = jtmp[j];
16979566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(batmp + bs2 * j, rtmp + bs2 * bjtmp[j], bs2));
16982efa7f71SHong Zhang     }
16992efa7f71SHong Zhang     bi[i + 1] = bi[i] + ncut;
17002efa7f71SHong Zhang     nzi       = ncut + 1;
17012efa7f71SHong Zhang 
17022efa7f71SHong Zhang     /* apply level dropping rule to U part */
17032efa7f71SHong Zhang     ncut = nzi_au + dtcount;
17042efa7f71SHong Zhang     if (ncut < nzi_bu) {
17059566063dSJacob Faibussowitsch       PetscCall(PetscSortSplitReal(ncut, nzi_bu, vtmp_abs + nzi_bl + 1, jtmp + nzi_bl + 1));
17069566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
17072efa7f71SHong Zhang     } else {
17082efa7f71SHong Zhang       ncut = nzi_bu;
17092efa7f71SHong Zhang     }
17102efa7f71SHong Zhang     nzi += ncut;
17112efa7f71SHong Zhang 
17122efa7f71SHong Zhang     /* mark bdiagonal */
17132efa7f71SHong Zhang     bdiag[i + 1]    = bdiag[i] - (ncut + 1);
17146bce7ff8SHong Zhang     bi[2 * mbs - i] = bi[2 * mbs - i + 1] - (ncut + 1);
17156bce7ff8SHong Zhang 
17162efa7f71SHong Zhang     bjtmp = bj + bdiag[i];
17172efa7f71SHong Zhang     batmp = ba + bs2 * bdiag[i];
17189566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(batmp, rtmp + bs2 * i, bs2));
17192efa7f71SHong Zhang     *bjtmp = i;
17205f8bbccaSHong Zhang 
17212efa7f71SHong Zhang     bjtmp = bj + bdiag[i + 1] + 1;
17222efa7f71SHong Zhang     batmp = ba + (bdiag[i + 1] + 1) * bs2;
17232efa7f71SHong Zhang 
17242efa7f71SHong Zhang     for (k = 0; k < ncut; k++) {
17252efa7f71SHong Zhang       bjtmp[k] = jtmp[nzi_bl + 1 + k];
17269566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(batmp + bs2 * k, rtmp + bs2 * bjtmp[k], bs2));
17272efa7f71SHong Zhang     }
17282efa7f71SHong Zhang 
17292efa7f71SHong Zhang     im[i] = nzi; /* used by PetscLLAddSortedLU() */
17302efa7f71SHong Zhang 
1731a5b23f4aSJose E. Roman     /* invert diagonal block for simpler triangular solves - add shift??? */
17322efa7f71SHong Zhang     batmp = ba + bs2 * bdiag[i];
17335f8bbccaSHong Zhang 
17349566063dSJacob Faibussowitsch     PetscCall(PetscKernel_A_gets_inverse_A(bs, batmp, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
17357b6c816cSBarry Smith     if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
17362efa7f71SHong Zhang   } /* for (i=0; i<mbs; i++) */
17379566063dSJacob Faibussowitsch   PetscCall(PetscFree3(v_work, multiplier, v_pivots));
17382efa7f71SHong Zhang 
17396da515a1SHong Zhang   /* printf("end of L %d, beginning of U %d\n",bi[mbs],bdiag[mbs]); */
174094bad497SJacob Faibussowitsch   PetscCheck(bi[mbs] < bdiag[mbs], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[mbs], bdiag[mbs]);
17412efa7f71SHong Zhang 
17429566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
17439566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
17442efa7f71SHong Zhang 
17459566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
17462efa7f71SHong Zhang 
17479566063dSJacob Faibussowitsch   PetscCall(PetscFree2(im, jtmp));
17489566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rtmp, vtmp));
17492efa7f71SHong Zhang 
17509566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(bs2 * B->cmap->n));
17512efa7f71SHong Zhang   b->maxnz = b->nz = bi[mbs] + bdiag[0] - bdiag[mbs];
17522efa7f71SHong Zhang 
17539566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
17549566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &icol_identity));
17552efa7f71SHong Zhang   if (row_identity && icol_identity) {
17564dd39f65SShri Abhyankar     B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering;
17572efa7f71SHong Zhang   } else {
17584dd39f65SShri Abhyankar     B->ops->solve = MatSolve_SeqBAIJ_N;
17592efa7f71SHong Zhang   }
17602efa7f71SHong Zhang 
1761f4259b30SLisandro Dalcin   B->ops->solveadd          = NULL;
1762f4259b30SLisandro Dalcin   B->ops->solvetranspose    = NULL;
1763f4259b30SLisandro Dalcin   B->ops->solvetransposeadd = NULL;
1764f4259b30SLisandro Dalcin   B->ops->matsolve          = NULL;
17652efa7f71SHong Zhang   B->assembled              = PETSC_TRUE;
17662efa7f71SHong Zhang   B->preallocated           = PETSC_TRUE;
17673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1768c8342467SHong Zhang }
1769