xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ff6a95418ff72e09afb68819ffbfc86bad111fa0)
1be1d678aSKris Buschelman 
22593348eSBarry Smith /*
3b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
42593348eSBarry Smith   matrix storage format.
52593348eSBarry Smith */
6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7c6db04a5SJed Brown #include <petscblaslapack.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
1043516a2dSKris Buschelman 
117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
137ea3e4caSstefano_zampini #endif
147ea3e4caSstefano_zampini 
15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
17b5b72c8aSIrina Sokolova #endif
18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
19b5b72c8aSIrina Sokolova 
20*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
21d71ae5a4SJacob Faibussowitsch {
229463ebdaSPierre Jolivet   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
23*ff6a9541SJacob Faibussowitsch   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
249463ebdaSPierre Jolivet   MatScalar   *a_val = a_aij->a;
259463ebdaSPierre Jolivet 
269463ebdaSPierre Jolivet   PetscFunctionBegin;
279566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
28*ff6a9541SJacob Faibussowitsch   PetscCall(PetscArrayzero(reductions, n));
299463ebdaSPierre Jolivet   if (type == NORM_2) {
30*ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
319463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
329463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
33857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
349463ebdaSPierre Jolivet           a_val++;
359463ebdaSPierre Jolivet         }
369463ebdaSPierre Jolivet       }
379463ebdaSPierre Jolivet     }
389463ebdaSPierre Jolivet   } else if (type == NORM_1) {
39*ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
409463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
419463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
42857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
439463ebdaSPierre Jolivet           a_val++;
449463ebdaSPierre Jolivet         }
459463ebdaSPierre Jolivet       }
469463ebdaSPierre Jolivet     }
479463ebdaSPierre Jolivet   } else if (type == NORM_INFINITY) {
48*ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
499463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
509463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
519463ebdaSPierre Jolivet           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
52857cbf51SRichard Tran Mills           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
539463ebdaSPierre Jolivet           a_val++;
549463ebdaSPierre Jolivet         }
559463ebdaSPierre Jolivet       }
569463ebdaSPierre Jolivet     }
57857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
58*ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
59857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
60857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
61857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
62857cbf51SRichard Tran Mills           a_val++;
63857cbf51SRichard Tran Mills         }
64857cbf51SRichard Tran Mills       }
65857cbf51SRichard Tran Mills     }
66857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
67*ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
68857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
69857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
70857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
71857cbf51SRichard Tran Mills           a_val++;
72857cbf51SRichard Tran Mills         }
73857cbf51SRichard Tran Mills       }
74857cbf51SRichard Tran Mills     }
75857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
769463ebdaSPierre Jolivet   if (type == NORM_2) {
77*ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
78857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79*ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
809463ebdaSPierre Jolivet   }
813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
829463ebdaSPierre Jolivet }
839463ebdaSPierre Jolivet 
84d71ae5a4SJacob Faibussowitsch PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
85d71ae5a4SJacob Faibussowitsch {
86b01c7715SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
87de80f912SBarry Smith   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
887f0c90edSBarry Smith   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
8962bba022SBarry Smith   PetscReal    shift = 0.0;
901a9391e3SHong Zhang   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
91b01c7715SBarry Smith 
92b01c7715SBarry Smith   PetscFunctionBegin;
93a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
94a455e926SHong Zhang 
959797317bSBarry Smith   if (a->idiagvalid) {
969797317bSBarry Smith     if (values) *values = a->idiag;
973ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
989797317bSBarry Smith   }
999566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
100b01c7715SBarry Smith   diag_offset = a->diag;
1014dfa11a4SJacob Faibussowitsch   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
102b01c7715SBarry Smith   diag = a->idiag;
103bbead8a2SBarry Smith   if (values) *values = a->idiag;
104b01c7715SBarry Smith   /* factor and invert each block */
105521d7252SBarry Smith   switch (bs) {
106ab040260SJed Brown   case 1:
107ab040260SJed Brown     for (i = 0; i < mbs; i++) {
108ab040260SJed Brown       odiag   = v + 1 * diag_offset[i];
109ab040260SJed Brown       diag[0] = odiag[0];
110ec1892c8SHong Zhang 
111ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
112ec1892c8SHong Zhang         if (allowzeropivot) {
1137b6c816cSBarry Smith           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1147b6c816cSBarry Smith           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1157b6c816cSBarry Smith           A->factorerror_zeropivot_row   = i;
1169566063dSJacob Faibussowitsch           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
11798921bdaSJacob Faibussowitsch         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
118ec1892c8SHong Zhang       }
119ec1892c8SHong Zhang 
120d4a378daSJed Brown       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
121ab040260SJed Brown       diag += 1;
122ab040260SJed Brown     }
123ab040260SJed Brown     break;
124b01c7715SBarry Smith   case 2:
125b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
126b01c7715SBarry Smith       odiag   = v + 4 * diag_offset[i];
1279371c9d4SSatish Balay       diag[0] = odiag[0];
1289371c9d4SSatish Balay       diag[1] = odiag[1];
1299371c9d4SSatish Balay       diag[2] = odiag[2];
1309371c9d4SSatish Balay       diag[3] = odiag[3];
1319566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1327b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
133b01c7715SBarry Smith       diag += 4;
134b01c7715SBarry Smith     }
135b01c7715SBarry Smith     break;
136b01c7715SBarry Smith   case 3:
137b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
138b01c7715SBarry Smith       odiag   = v + 9 * diag_offset[i];
1399371c9d4SSatish Balay       diag[0] = odiag[0];
1409371c9d4SSatish Balay       diag[1] = odiag[1];
1419371c9d4SSatish Balay       diag[2] = odiag[2];
1429371c9d4SSatish Balay       diag[3] = odiag[3];
1439371c9d4SSatish Balay       diag[4] = odiag[4];
1449371c9d4SSatish Balay       diag[5] = odiag[5];
1459371c9d4SSatish Balay       diag[6] = odiag[6];
1469371c9d4SSatish Balay       diag[7] = odiag[7];
147b01c7715SBarry Smith       diag[8] = odiag[8];
1489566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
1497b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
150b01c7715SBarry Smith       diag += 9;
151b01c7715SBarry Smith     }
152b01c7715SBarry Smith     break;
153b01c7715SBarry Smith   case 4:
154b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
155b01c7715SBarry Smith       odiag = v + 16 * diag_offset[i];
1569566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 16));
1579566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
1587b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
159b01c7715SBarry Smith       diag += 16;
160b01c7715SBarry Smith     }
161b01c7715SBarry Smith     break;
162b01c7715SBarry Smith   case 5:
163b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
164b01c7715SBarry Smith       odiag = v + 25 * diag_offset[i];
1659566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 25));
1669566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
1677b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
168b01c7715SBarry Smith       diag += 25;
169b01c7715SBarry Smith     }
170b01c7715SBarry Smith     break;
171d49b2adcSBarry Smith   case 6:
172d49b2adcSBarry Smith     for (i = 0; i < mbs; i++) {
173d49b2adcSBarry Smith       odiag = v + 36 * diag_offset[i];
1749566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 36));
1759566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
1767b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
177d49b2adcSBarry Smith       diag += 36;
178d49b2adcSBarry Smith     }
179d49b2adcSBarry Smith     break;
180de80f912SBarry Smith   case 7:
181de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
182de80f912SBarry Smith       odiag = v + 49 * diag_offset[i];
1839566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 49));
1849566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
1857b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
186de80f912SBarry Smith       diag += 49;
187de80f912SBarry Smith     }
188de80f912SBarry Smith     break;
189b01c7715SBarry Smith   default:
1909566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
191de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
192de80f912SBarry Smith       odiag = v + bs2 * diag_offset[i];
1939566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, bs2));
1949566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
1957b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
196de80f912SBarry Smith       diag += bs2;
197de80f912SBarry Smith     }
1989566063dSJacob Faibussowitsch     PetscCall(PetscFree2(v_work, v_pivots));
199b01c7715SBarry Smith   }
200b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
2013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
202b01c7715SBarry Smith }
203b01c7715SBarry Smith 
204d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
205d71ae5a4SJacob Faibussowitsch {
2066d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
207e48d15efSToby Isaac   PetscScalar       *x, *work, *w, *workt, *t;
208e48d15efSToby Isaac   const MatScalar   *v, *aa = a->a, *idiag;
209e48d15efSToby Isaac   const PetscScalar *b, *xb;
2105455b99fSToby Isaac   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
211e48d15efSToby Isaac   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
212c1ac3661SBarry Smith   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
213b01c7715SBarry Smith 
214b01c7715SBarry Smith   PetscFunctionBegin;
215b01c7715SBarry Smith   its = its * lits;
2165f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
2175f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2185f80ce2aSJacob Faibussowitsch   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
2195f80ce2aSJacob Faibussowitsch   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
2205f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
221b01c7715SBarry Smith 
2229566063dSJacob Faibussowitsch   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
223b01c7715SBarry Smith 
2243ba16761SJacob Faibussowitsch   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
225b01c7715SBarry Smith   diag  = a->diag;
226b01c7715SBarry Smith   idiag = a->idiag;
227de80f912SBarry Smith   k     = PetscMax(A->rmap->n, A->cmap->n);
22848a46eb9SPierre Jolivet   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
22948a46eb9SPierre Jolivet   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
23048a46eb9SPierre Jolivet   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
2313475c22fSBarry Smith   work = a->mult_work;
2323475c22fSBarry Smith   t    = a->sor_workt;
233de80f912SBarry Smith   w    = a->sor_work;
234de80f912SBarry Smith 
2359566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
2369566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
237de80f912SBarry Smith 
238de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
239de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
240e48d15efSToby Isaac       switch (bs) {
241e48d15efSToby Isaac       case 1:
242e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
243e48d15efSToby Isaac         t[0] = b[0];
244e48d15efSToby Isaac         i2   = 1;
245e48d15efSToby Isaac         idiag += 1;
246e48d15efSToby Isaac         for (i = 1; i < m; i++) {
247e48d15efSToby Isaac           v    = aa + ai[i];
248e48d15efSToby Isaac           vi   = aj + ai[i];
249e48d15efSToby Isaac           nz   = diag[i] - ai[i];
250e48d15efSToby Isaac           s[0] = b[i2];
251e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
252e48d15efSToby Isaac             xw[0] = x[vi[j]];
253e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
254e48d15efSToby Isaac           }
255e48d15efSToby Isaac           t[i2] = s[0];
256e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
257e48d15efSToby Isaac           x[i2] = xw[0];
258e48d15efSToby Isaac           idiag += 1;
259e48d15efSToby Isaac           i2 += 1;
260e48d15efSToby Isaac         }
261e48d15efSToby Isaac         break;
262e48d15efSToby Isaac       case 2:
263e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
2649371c9d4SSatish Balay         t[0] = b[0];
2659371c9d4SSatish Balay         t[1] = b[1];
266e48d15efSToby Isaac         i2   = 2;
267e48d15efSToby Isaac         idiag += 4;
268e48d15efSToby Isaac         for (i = 1; i < m; i++) {
269e48d15efSToby Isaac           v    = aa + 4 * ai[i];
270e48d15efSToby Isaac           vi   = aj + ai[i];
271e48d15efSToby Isaac           nz   = diag[i] - ai[i];
2729371c9d4SSatish Balay           s[0] = b[i2];
2739371c9d4SSatish Balay           s[1] = b[i2 + 1];
274e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
275e48d15efSToby Isaac             idx   = 2 * vi[j];
276e48d15efSToby Isaac             it    = 4 * j;
2779371c9d4SSatish Balay             xw[0] = x[idx];
2789371c9d4SSatish Balay             xw[1] = x[1 + idx];
279e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
280e48d15efSToby Isaac           }
2819371c9d4SSatish Balay           t[i2]     = s[0];
2829371c9d4SSatish Balay           t[i2 + 1] = s[1];
283e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
2849371c9d4SSatish Balay           x[i2]     = xw[0];
2859371c9d4SSatish Balay           x[i2 + 1] = xw[1];
286e48d15efSToby Isaac           idiag += 4;
287e48d15efSToby Isaac           i2 += 2;
288e48d15efSToby Isaac         }
289e48d15efSToby Isaac         break;
290e48d15efSToby Isaac       case 3:
291e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
2929371c9d4SSatish Balay         t[0] = b[0];
2939371c9d4SSatish Balay         t[1] = b[1];
2949371c9d4SSatish Balay         t[2] = b[2];
295e48d15efSToby Isaac         i2   = 3;
296e48d15efSToby Isaac         idiag += 9;
297e48d15efSToby Isaac         for (i = 1; i < m; i++) {
298e48d15efSToby Isaac           v    = aa + 9 * ai[i];
299e48d15efSToby Isaac           vi   = aj + ai[i];
300e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3019371c9d4SSatish Balay           s[0] = b[i2];
3029371c9d4SSatish Balay           s[1] = b[i2 + 1];
3039371c9d4SSatish Balay           s[2] = b[i2 + 2];
304e48d15efSToby Isaac           while (nz--) {
305e48d15efSToby Isaac             idx   = 3 * (*vi++);
3069371c9d4SSatish Balay             xw[0] = x[idx];
3079371c9d4SSatish Balay             xw[1] = x[1 + idx];
3089371c9d4SSatish Balay             xw[2] = x[2 + idx];
309e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
310e48d15efSToby Isaac             v += 9;
311e48d15efSToby Isaac           }
3129371c9d4SSatish Balay           t[i2]     = s[0];
3139371c9d4SSatish Balay           t[i2 + 1] = s[1];
3149371c9d4SSatish Balay           t[i2 + 2] = s[2];
315e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
3169371c9d4SSatish Balay           x[i2]     = xw[0];
3179371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3189371c9d4SSatish Balay           x[i2 + 2] = xw[2];
319e48d15efSToby Isaac           idiag += 9;
320e48d15efSToby Isaac           i2 += 3;
321e48d15efSToby Isaac         }
322e48d15efSToby Isaac         break;
323e48d15efSToby Isaac       case 4:
324e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
3259371c9d4SSatish Balay         t[0] = b[0];
3269371c9d4SSatish Balay         t[1] = b[1];
3279371c9d4SSatish Balay         t[2] = b[2];
3289371c9d4SSatish Balay         t[3] = b[3];
329e48d15efSToby Isaac         i2   = 4;
330e48d15efSToby Isaac         idiag += 16;
331e48d15efSToby Isaac         for (i = 1; i < m; i++) {
332e48d15efSToby Isaac           v    = aa + 16 * ai[i];
333e48d15efSToby Isaac           vi   = aj + ai[i];
334e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3359371c9d4SSatish Balay           s[0] = b[i2];
3369371c9d4SSatish Balay           s[1] = b[i2 + 1];
3379371c9d4SSatish Balay           s[2] = b[i2 + 2];
3389371c9d4SSatish Balay           s[3] = b[i2 + 3];
339e48d15efSToby Isaac           while (nz--) {
340e48d15efSToby Isaac             idx   = 4 * (*vi++);
3419371c9d4SSatish Balay             xw[0] = x[idx];
3429371c9d4SSatish Balay             xw[1] = x[1 + idx];
3439371c9d4SSatish Balay             xw[2] = x[2 + idx];
3449371c9d4SSatish Balay             xw[3] = x[3 + idx];
345e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
346e48d15efSToby Isaac             v += 16;
347e48d15efSToby Isaac           }
3489371c9d4SSatish Balay           t[i2]     = s[0];
3499371c9d4SSatish Balay           t[i2 + 1] = s[1];
3509371c9d4SSatish Balay           t[i2 + 2] = s[2];
3519371c9d4SSatish Balay           t[i2 + 3] = s[3];
352e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
3539371c9d4SSatish Balay           x[i2]     = xw[0];
3549371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3559371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3569371c9d4SSatish Balay           x[i2 + 3] = xw[3];
357e48d15efSToby Isaac           idiag += 16;
358e48d15efSToby Isaac           i2 += 4;
359e48d15efSToby Isaac         }
360e48d15efSToby Isaac         break;
361e48d15efSToby Isaac       case 5:
362e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
3639371c9d4SSatish Balay         t[0] = b[0];
3649371c9d4SSatish Balay         t[1] = b[1];
3659371c9d4SSatish Balay         t[2] = b[2];
3669371c9d4SSatish Balay         t[3] = b[3];
3679371c9d4SSatish Balay         t[4] = b[4];
368e48d15efSToby Isaac         i2   = 5;
369e48d15efSToby Isaac         idiag += 25;
370e48d15efSToby Isaac         for (i = 1; i < m; i++) {
371e48d15efSToby Isaac           v    = aa + 25 * ai[i];
372e48d15efSToby Isaac           vi   = aj + ai[i];
373e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3749371c9d4SSatish Balay           s[0] = b[i2];
3759371c9d4SSatish Balay           s[1] = b[i2 + 1];
3769371c9d4SSatish Balay           s[2] = b[i2 + 2];
3779371c9d4SSatish Balay           s[3] = b[i2 + 3];
3789371c9d4SSatish Balay           s[4] = b[i2 + 4];
379e48d15efSToby Isaac           while (nz--) {
380e48d15efSToby Isaac             idx   = 5 * (*vi++);
3819371c9d4SSatish Balay             xw[0] = x[idx];
3829371c9d4SSatish Balay             xw[1] = x[1 + idx];
3839371c9d4SSatish Balay             xw[2] = x[2 + idx];
3849371c9d4SSatish Balay             xw[3] = x[3 + idx];
3859371c9d4SSatish Balay             xw[4] = x[4 + idx];
386e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
387e48d15efSToby Isaac             v += 25;
388e48d15efSToby Isaac           }
3899371c9d4SSatish Balay           t[i2]     = s[0];
3909371c9d4SSatish Balay           t[i2 + 1] = s[1];
3919371c9d4SSatish Balay           t[i2 + 2] = s[2];
3929371c9d4SSatish Balay           t[i2 + 3] = s[3];
3939371c9d4SSatish Balay           t[i2 + 4] = s[4];
394e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
3959371c9d4SSatish Balay           x[i2]     = xw[0];
3969371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3979371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3989371c9d4SSatish Balay           x[i2 + 3] = xw[3];
3999371c9d4SSatish Balay           x[i2 + 4] = xw[4];
400e48d15efSToby Isaac           idiag += 25;
401e48d15efSToby Isaac           i2 += 5;
402e48d15efSToby Isaac         }
403e48d15efSToby Isaac         break;
404e48d15efSToby Isaac       case 6:
405e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
4069371c9d4SSatish Balay         t[0] = b[0];
4079371c9d4SSatish Balay         t[1] = b[1];
4089371c9d4SSatish Balay         t[2] = b[2];
4099371c9d4SSatish Balay         t[3] = b[3];
4109371c9d4SSatish Balay         t[4] = b[4];
4119371c9d4SSatish Balay         t[5] = b[5];
412e48d15efSToby Isaac         i2   = 6;
413e48d15efSToby Isaac         idiag += 36;
414e48d15efSToby Isaac         for (i = 1; i < m; i++) {
415e48d15efSToby Isaac           v    = aa + 36 * ai[i];
416e48d15efSToby Isaac           vi   = aj + ai[i];
417e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4189371c9d4SSatish Balay           s[0] = b[i2];
4199371c9d4SSatish Balay           s[1] = b[i2 + 1];
4209371c9d4SSatish Balay           s[2] = b[i2 + 2];
4219371c9d4SSatish Balay           s[3] = b[i2 + 3];
4229371c9d4SSatish Balay           s[4] = b[i2 + 4];
4239371c9d4SSatish Balay           s[5] = b[i2 + 5];
424e48d15efSToby Isaac           while (nz--) {
425e48d15efSToby Isaac             idx   = 6 * (*vi++);
4269371c9d4SSatish Balay             xw[0] = x[idx];
4279371c9d4SSatish Balay             xw[1] = x[1 + idx];
4289371c9d4SSatish Balay             xw[2] = x[2 + idx];
4299371c9d4SSatish Balay             xw[3] = x[3 + idx];
4309371c9d4SSatish Balay             xw[4] = x[4 + idx];
4319371c9d4SSatish Balay             xw[5] = x[5 + idx];
432e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
433e48d15efSToby Isaac             v += 36;
434e48d15efSToby Isaac           }
4359371c9d4SSatish Balay           t[i2]     = s[0];
4369371c9d4SSatish Balay           t[i2 + 1] = s[1];
4379371c9d4SSatish Balay           t[i2 + 2] = s[2];
4389371c9d4SSatish Balay           t[i2 + 3] = s[3];
4399371c9d4SSatish Balay           t[i2 + 4] = s[4];
4409371c9d4SSatish Balay           t[i2 + 5] = s[5];
441e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
4429371c9d4SSatish Balay           x[i2]     = xw[0];
4439371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4449371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4459371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4469371c9d4SSatish Balay           x[i2 + 4] = xw[4];
4479371c9d4SSatish Balay           x[i2 + 5] = xw[5];
448e48d15efSToby Isaac           idiag += 36;
449e48d15efSToby Isaac           i2 += 6;
450e48d15efSToby Isaac         }
451e48d15efSToby Isaac         break;
452e48d15efSToby Isaac       case 7:
453e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
4549371c9d4SSatish Balay         t[0] = b[0];
4559371c9d4SSatish Balay         t[1] = b[1];
4569371c9d4SSatish Balay         t[2] = b[2];
4579371c9d4SSatish Balay         t[3] = b[3];
4589371c9d4SSatish Balay         t[4] = b[4];
4599371c9d4SSatish Balay         t[5] = b[5];
4609371c9d4SSatish Balay         t[6] = b[6];
461e48d15efSToby Isaac         i2   = 7;
462e48d15efSToby Isaac         idiag += 49;
463e48d15efSToby Isaac         for (i = 1; i < m; i++) {
464e48d15efSToby Isaac           v    = aa + 49 * ai[i];
465e48d15efSToby Isaac           vi   = aj + ai[i];
466e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4679371c9d4SSatish Balay           s[0] = b[i2];
4689371c9d4SSatish Balay           s[1] = b[i2 + 1];
4699371c9d4SSatish Balay           s[2] = b[i2 + 2];
4709371c9d4SSatish Balay           s[3] = b[i2 + 3];
4719371c9d4SSatish Balay           s[4] = b[i2 + 4];
4729371c9d4SSatish Balay           s[5] = b[i2 + 5];
4739371c9d4SSatish Balay           s[6] = b[i2 + 6];
474e48d15efSToby Isaac           while (nz--) {
475e48d15efSToby Isaac             idx   = 7 * (*vi++);
4769371c9d4SSatish Balay             xw[0] = x[idx];
4779371c9d4SSatish Balay             xw[1] = x[1 + idx];
4789371c9d4SSatish Balay             xw[2] = x[2 + idx];
4799371c9d4SSatish Balay             xw[3] = x[3 + idx];
4809371c9d4SSatish Balay             xw[4] = x[4 + idx];
4819371c9d4SSatish Balay             xw[5] = x[5 + idx];
4829371c9d4SSatish Balay             xw[6] = x[6 + idx];
483e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
484e48d15efSToby Isaac             v += 49;
485e48d15efSToby Isaac           }
4869371c9d4SSatish Balay           t[i2]     = s[0];
4879371c9d4SSatish Balay           t[i2 + 1] = s[1];
4889371c9d4SSatish Balay           t[i2 + 2] = s[2];
4899371c9d4SSatish Balay           t[i2 + 3] = s[3];
4909371c9d4SSatish Balay           t[i2 + 4] = s[4];
4919371c9d4SSatish Balay           t[i2 + 5] = s[5];
4929371c9d4SSatish Balay           t[i2 + 6] = s[6];
493e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
4949371c9d4SSatish Balay           x[i2]     = xw[0];
4959371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4969371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4979371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4989371c9d4SSatish Balay           x[i2 + 4] = xw[4];
4999371c9d4SSatish Balay           x[i2 + 5] = xw[5];
5009371c9d4SSatish Balay           x[i2 + 6] = xw[6];
501e48d15efSToby Isaac           idiag += 49;
502e48d15efSToby Isaac           i2 += 7;
503e48d15efSToby Isaac         }
504e48d15efSToby Isaac         break;
505e48d15efSToby Isaac       default:
50696b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
5079566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(t, b, bs));
508de80f912SBarry Smith         i2 = bs;
509de80f912SBarry Smith         idiag += bs2;
510de80f912SBarry Smith         for (i = 1; i < m; i++) {
511de80f912SBarry Smith           v  = aa + bs2 * ai[i];
512de80f912SBarry Smith           vi = aj + ai[i];
513de80f912SBarry Smith           nz = diag[i] - ai[i];
514de80f912SBarry Smith 
5159566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
516de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
517de80f912SBarry Smith           workt = work;
518de80f912SBarry Smith           for (j = 0; j < nz; j++) {
5199566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
520de80f912SBarry Smith             workt += bs;
521de80f912SBarry Smith           }
52296b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
5239566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(t + i2, w, bs));
52496b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
525de80f912SBarry Smith 
526de80f912SBarry Smith           idiag += bs2;
527de80f912SBarry Smith           i2 += bs;
528de80f912SBarry Smith         }
529e48d15efSToby Isaac         break;
530e48d15efSToby Isaac       }
531de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
5329566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
533e48d15efSToby Isaac       xb = t;
5349371c9d4SSatish Balay     } else xb = b;
535de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
536e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
537e48d15efSToby Isaac       i2    = bs * (m - 1);
538e48d15efSToby Isaac       switch (bs) {
539e48d15efSToby Isaac       case 1:
540e48d15efSToby Isaac         s[0] = xb[i2];
541e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
542e48d15efSToby Isaac         x[i2] = xw[0];
543e48d15efSToby Isaac         i2 -= 1;
544e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
545e48d15efSToby Isaac           v    = aa + (diag[i] + 1);
546e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
547e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
548e48d15efSToby Isaac           s[0] = xb[i2];
549e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
550e48d15efSToby Isaac             xw[0] = x[vi[j]];
551e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
552e48d15efSToby Isaac           }
553e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
554e48d15efSToby Isaac           x[i2] = xw[0];
555e48d15efSToby Isaac           idiag -= 1;
556e48d15efSToby Isaac           i2 -= 1;
557e48d15efSToby Isaac         }
558e48d15efSToby Isaac         break;
559e48d15efSToby Isaac       case 2:
5609371c9d4SSatish Balay         s[0] = xb[i2];
5619371c9d4SSatish Balay         s[1] = xb[i2 + 1];
562e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5639371c9d4SSatish Balay         x[i2]     = xw[0];
5649371c9d4SSatish Balay         x[i2 + 1] = xw[1];
565e48d15efSToby Isaac         i2 -= 2;
566e48d15efSToby Isaac         idiag -= 4;
567e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
568e48d15efSToby Isaac           v    = aa + 4 * (diag[i] + 1);
569e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
570e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
5719371c9d4SSatish Balay           s[0] = xb[i2];
5729371c9d4SSatish Balay           s[1] = xb[i2 + 1];
573e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
574e48d15efSToby Isaac             idx   = 2 * vi[j];
575e48d15efSToby Isaac             it    = 4 * j;
5769371c9d4SSatish Balay             xw[0] = x[idx];
5779371c9d4SSatish Balay             xw[1] = x[1 + idx];
578e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
579e48d15efSToby Isaac           }
580e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5819371c9d4SSatish Balay           x[i2]     = xw[0];
5829371c9d4SSatish Balay           x[i2 + 1] = xw[1];
583e48d15efSToby Isaac           idiag -= 4;
584e48d15efSToby Isaac           i2 -= 2;
585e48d15efSToby Isaac         }
586e48d15efSToby Isaac         break;
587e48d15efSToby Isaac       case 3:
5889371c9d4SSatish Balay         s[0] = xb[i2];
5899371c9d4SSatish Balay         s[1] = xb[i2 + 1];
5909371c9d4SSatish Balay         s[2] = xb[i2 + 2];
591e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
5929371c9d4SSatish Balay         x[i2]     = xw[0];
5939371c9d4SSatish Balay         x[i2 + 1] = xw[1];
5949371c9d4SSatish Balay         x[i2 + 2] = xw[2];
595e48d15efSToby Isaac         i2 -= 3;
596e48d15efSToby Isaac         idiag -= 9;
597e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
598e48d15efSToby Isaac           v    = aa + 9 * (diag[i] + 1);
599e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
600e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6019371c9d4SSatish Balay           s[0] = xb[i2];
6029371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6039371c9d4SSatish Balay           s[2] = xb[i2 + 2];
604e48d15efSToby Isaac           while (nz--) {
605e48d15efSToby Isaac             idx   = 3 * (*vi++);
6069371c9d4SSatish Balay             xw[0] = x[idx];
6079371c9d4SSatish Balay             xw[1] = x[1 + idx];
6089371c9d4SSatish Balay             xw[2] = x[2 + idx];
609e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
610e48d15efSToby Isaac             v += 9;
611e48d15efSToby Isaac           }
612e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6139371c9d4SSatish Balay           x[i2]     = xw[0];
6149371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6159371c9d4SSatish Balay           x[i2 + 2] = xw[2];
616e48d15efSToby Isaac           idiag -= 9;
617e48d15efSToby Isaac           i2 -= 3;
618e48d15efSToby Isaac         }
619e48d15efSToby Isaac         break;
620e48d15efSToby Isaac       case 4:
6219371c9d4SSatish Balay         s[0] = xb[i2];
6229371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6239371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6249371c9d4SSatish Balay         s[3] = xb[i2 + 3];
625e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6269371c9d4SSatish Balay         x[i2]     = xw[0];
6279371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6289371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6299371c9d4SSatish Balay         x[i2 + 3] = xw[3];
630e48d15efSToby Isaac         i2 -= 4;
631e48d15efSToby Isaac         idiag -= 16;
632e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
633e48d15efSToby Isaac           v    = aa + 16 * (diag[i] + 1);
634e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
635e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6369371c9d4SSatish Balay           s[0] = xb[i2];
6379371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6389371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6399371c9d4SSatish Balay           s[3] = xb[i2 + 3];
640e48d15efSToby Isaac           while (nz--) {
641e48d15efSToby Isaac             idx   = 4 * (*vi++);
6429371c9d4SSatish Balay             xw[0] = x[idx];
6439371c9d4SSatish Balay             xw[1] = x[1 + idx];
6449371c9d4SSatish Balay             xw[2] = x[2 + idx];
6459371c9d4SSatish Balay             xw[3] = x[3 + idx];
646e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
647e48d15efSToby Isaac             v += 16;
648e48d15efSToby Isaac           }
649e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6509371c9d4SSatish Balay           x[i2]     = xw[0];
6519371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6529371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6539371c9d4SSatish Balay           x[i2 + 3] = xw[3];
654e48d15efSToby Isaac           idiag -= 16;
655e48d15efSToby Isaac           i2 -= 4;
656e48d15efSToby Isaac         }
657e48d15efSToby Isaac         break;
658e48d15efSToby Isaac       case 5:
6599371c9d4SSatish Balay         s[0] = xb[i2];
6609371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6619371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6629371c9d4SSatish Balay         s[3] = xb[i2 + 3];
6639371c9d4SSatish Balay         s[4] = xb[i2 + 4];
664e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6659371c9d4SSatish Balay         x[i2]     = xw[0];
6669371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6679371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6689371c9d4SSatish Balay         x[i2 + 3] = xw[3];
6699371c9d4SSatish Balay         x[i2 + 4] = xw[4];
670e48d15efSToby Isaac         i2 -= 5;
671e48d15efSToby Isaac         idiag -= 25;
672e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
673e48d15efSToby Isaac           v    = aa + 25 * (diag[i] + 1);
674e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
675e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6769371c9d4SSatish Balay           s[0] = xb[i2];
6779371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6789371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6799371c9d4SSatish Balay           s[3] = xb[i2 + 3];
6809371c9d4SSatish Balay           s[4] = xb[i2 + 4];
681e48d15efSToby Isaac           while (nz--) {
682e48d15efSToby Isaac             idx   = 5 * (*vi++);
6839371c9d4SSatish Balay             xw[0] = x[idx];
6849371c9d4SSatish Balay             xw[1] = x[1 + idx];
6859371c9d4SSatish Balay             xw[2] = x[2 + idx];
6869371c9d4SSatish Balay             xw[3] = x[3 + idx];
6879371c9d4SSatish Balay             xw[4] = x[4 + idx];
688e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
689e48d15efSToby Isaac             v += 25;
690e48d15efSToby Isaac           }
691e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6929371c9d4SSatish Balay           x[i2]     = xw[0];
6939371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6949371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6959371c9d4SSatish Balay           x[i2 + 3] = xw[3];
6969371c9d4SSatish Balay           x[i2 + 4] = xw[4];
697e48d15efSToby Isaac           idiag -= 25;
698e48d15efSToby Isaac           i2 -= 5;
699e48d15efSToby Isaac         }
700e48d15efSToby Isaac         break;
701e48d15efSToby Isaac       case 6:
7029371c9d4SSatish Balay         s[0] = xb[i2];
7039371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7049371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7059371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7069371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7079371c9d4SSatish Balay         s[5] = xb[i2 + 5];
708e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7099371c9d4SSatish Balay         x[i2]     = xw[0];
7109371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7119371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7129371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7139371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7149371c9d4SSatish Balay         x[i2 + 5] = xw[5];
715e48d15efSToby Isaac         i2 -= 6;
716e48d15efSToby Isaac         idiag -= 36;
717e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
718e48d15efSToby Isaac           v    = aa + 36 * (diag[i] + 1);
719e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
720e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7219371c9d4SSatish Balay           s[0] = xb[i2];
7229371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7239371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7249371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7259371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7269371c9d4SSatish Balay           s[5] = xb[i2 + 5];
727e48d15efSToby Isaac           while (nz--) {
728e48d15efSToby Isaac             idx   = 6 * (*vi++);
7299371c9d4SSatish Balay             xw[0] = x[idx];
7309371c9d4SSatish Balay             xw[1] = x[1 + idx];
7319371c9d4SSatish Balay             xw[2] = x[2 + idx];
7329371c9d4SSatish Balay             xw[3] = x[3 + idx];
7339371c9d4SSatish Balay             xw[4] = x[4 + idx];
7349371c9d4SSatish Balay             xw[5] = x[5 + idx];
735e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
736e48d15efSToby Isaac             v += 36;
737e48d15efSToby Isaac           }
738e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7399371c9d4SSatish Balay           x[i2]     = xw[0];
7409371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7419371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7429371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7439371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7449371c9d4SSatish Balay           x[i2 + 5] = xw[5];
745e48d15efSToby Isaac           idiag -= 36;
746e48d15efSToby Isaac           i2 -= 6;
747e48d15efSToby Isaac         }
748e48d15efSToby Isaac         break;
749e48d15efSToby Isaac       case 7:
7509371c9d4SSatish Balay         s[0] = xb[i2];
7519371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7529371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7539371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7549371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7559371c9d4SSatish Balay         s[5] = xb[i2 + 5];
7569371c9d4SSatish Balay         s[6] = xb[i2 + 6];
757e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
7589371c9d4SSatish Balay         x[i2]     = xw[0];
7599371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7609371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7619371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7629371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7639371c9d4SSatish Balay         x[i2 + 5] = xw[5];
7649371c9d4SSatish Balay         x[i2 + 6] = xw[6];
765e48d15efSToby Isaac         i2 -= 7;
766e48d15efSToby Isaac         idiag -= 49;
767e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
768e48d15efSToby Isaac           v    = aa + 49 * (diag[i] + 1);
769e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
770e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7719371c9d4SSatish Balay           s[0] = xb[i2];
7729371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7739371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7749371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7759371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7769371c9d4SSatish Balay           s[5] = xb[i2 + 5];
7779371c9d4SSatish Balay           s[6] = xb[i2 + 6];
778e48d15efSToby Isaac           while (nz--) {
779e48d15efSToby Isaac             idx   = 7 * (*vi++);
7809371c9d4SSatish Balay             xw[0] = x[idx];
7819371c9d4SSatish Balay             xw[1] = x[1 + idx];
7829371c9d4SSatish Balay             xw[2] = x[2 + idx];
7839371c9d4SSatish Balay             xw[3] = x[3 + idx];
7849371c9d4SSatish Balay             xw[4] = x[4 + idx];
7859371c9d4SSatish Balay             xw[5] = x[5 + idx];
7869371c9d4SSatish Balay             xw[6] = x[6 + idx];
787e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
788e48d15efSToby Isaac             v += 49;
789e48d15efSToby Isaac           }
790e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
7919371c9d4SSatish Balay           x[i2]     = xw[0];
7929371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7939371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7949371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7959371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7969371c9d4SSatish Balay           x[i2 + 5] = xw[5];
7979371c9d4SSatish Balay           x[i2 + 6] = xw[6];
798e48d15efSToby Isaac           idiag -= 49;
799e48d15efSToby Isaac           i2 -= 7;
800e48d15efSToby Isaac         }
801e48d15efSToby Isaac         break;
802e48d15efSToby Isaac       default:
8039566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(w, xb + i2, bs));
80496b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
805de80f912SBarry Smith         i2 -= bs;
806e48d15efSToby Isaac         idiag -= bs2;
807de80f912SBarry Smith         for (i = m - 2; i >= 0; i--) {
808de80f912SBarry Smith           v  = aa + bs2 * (diag[i] + 1);
809de80f912SBarry Smith           vi = aj + diag[i] + 1;
810de80f912SBarry Smith           nz = ai[i + 1] - diag[i] - 1;
811de80f912SBarry Smith 
8129566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, xb + i2, bs));
813de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
814de80f912SBarry Smith           workt = work;
815de80f912SBarry Smith           for (j = 0; j < nz; j++) {
8169566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
817de80f912SBarry Smith             workt += bs;
818de80f912SBarry Smith           }
81996b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
82096b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
821e48d15efSToby Isaac 
822de80f912SBarry Smith           idiag -= bs2;
823de80f912SBarry Smith           i2 -= bs;
824de80f912SBarry Smith         }
825e48d15efSToby Isaac         break;
826e48d15efSToby Isaac       }
8279566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
828de80f912SBarry Smith     }
829e48d15efSToby Isaac     its--;
830e48d15efSToby Isaac   }
831e48d15efSToby Isaac   while (its--) {
832e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
833e48d15efSToby Isaac       idiag = a->idiag;
834e48d15efSToby Isaac       i2    = 0;
835e48d15efSToby Isaac       switch (bs) {
836e48d15efSToby Isaac       case 1:
837e48d15efSToby Isaac         for (i = 0; i < m; i++) {
838e48d15efSToby Isaac           v    = aa + ai[i];
839e48d15efSToby Isaac           vi   = aj + ai[i];
840e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
841e48d15efSToby Isaac           s[0] = b[i2];
842e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
843e48d15efSToby Isaac             xw[0] = x[vi[j]];
844e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
845e48d15efSToby Isaac           }
846e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
847e48d15efSToby Isaac           x[i2] += xw[0];
848e48d15efSToby Isaac           idiag += 1;
849e48d15efSToby Isaac           i2 += 1;
850e48d15efSToby Isaac         }
851e48d15efSToby Isaac         break;
852e48d15efSToby Isaac       case 2:
853e48d15efSToby Isaac         for (i = 0; i < m; i++) {
854e48d15efSToby Isaac           v    = aa + 4 * ai[i];
855e48d15efSToby Isaac           vi   = aj + ai[i];
856e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8579371c9d4SSatish Balay           s[0] = b[i2];
8589371c9d4SSatish Balay           s[1] = b[i2 + 1];
859e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
860e48d15efSToby Isaac             idx   = 2 * vi[j];
861e48d15efSToby Isaac             it    = 4 * j;
8629371c9d4SSatish Balay             xw[0] = x[idx];
8639371c9d4SSatish Balay             xw[1] = x[1 + idx];
864e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
865e48d15efSToby Isaac           }
866e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
8679371c9d4SSatish Balay           x[i2] += xw[0];
8689371c9d4SSatish Balay           x[i2 + 1] += xw[1];
869e48d15efSToby Isaac           idiag += 4;
870e48d15efSToby Isaac           i2 += 2;
871e48d15efSToby Isaac         }
872e48d15efSToby Isaac         break;
873e48d15efSToby Isaac       case 3:
874e48d15efSToby Isaac         for (i = 0; i < m; i++) {
875e48d15efSToby Isaac           v    = aa + 9 * ai[i];
876e48d15efSToby Isaac           vi   = aj + ai[i];
877e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8789371c9d4SSatish Balay           s[0] = b[i2];
8799371c9d4SSatish Balay           s[1] = b[i2 + 1];
8809371c9d4SSatish Balay           s[2] = b[i2 + 2];
881e48d15efSToby Isaac           while (nz--) {
882e48d15efSToby Isaac             idx   = 3 * (*vi++);
8839371c9d4SSatish Balay             xw[0] = x[idx];
8849371c9d4SSatish Balay             xw[1] = x[1 + idx];
8859371c9d4SSatish Balay             xw[2] = x[2 + idx];
886e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
887e48d15efSToby Isaac             v += 9;
888e48d15efSToby Isaac           }
889e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
8909371c9d4SSatish Balay           x[i2] += xw[0];
8919371c9d4SSatish Balay           x[i2 + 1] += xw[1];
8929371c9d4SSatish Balay           x[i2 + 2] += xw[2];
893e48d15efSToby Isaac           idiag += 9;
894e48d15efSToby Isaac           i2 += 3;
895e48d15efSToby Isaac         }
896e48d15efSToby Isaac         break;
897e48d15efSToby Isaac       case 4:
898e48d15efSToby Isaac         for (i = 0; i < m; i++) {
899e48d15efSToby Isaac           v    = aa + 16 * ai[i];
900e48d15efSToby Isaac           vi   = aj + ai[i];
901e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9029371c9d4SSatish Balay           s[0] = b[i2];
9039371c9d4SSatish Balay           s[1] = b[i2 + 1];
9049371c9d4SSatish Balay           s[2] = b[i2 + 2];
9059371c9d4SSatish Balay           s[3] = b[i2 + 3];
906e48d15efSToby Isaac           while (nz--) {
907e48d15efSToby Isaac             idx   = 4 * (*vi++);
9089371c9d4SSatish Balay             xw[0] = x[idx];
9099371c9d4SSatish Balay             xw[1] = x[1 + idx];
9109371c9d4SSatish Balay             xw[2] = x[2 + idx];
9119371c9d4SSatish Balay             xw[3] = x[3 + idx];
912e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
913e48d15efSToby Isaac             v += 16;
914e48d15efSToby Isaac           }
915e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
9169371c9d4SSatish Balay           x[i2] += xw[0];
9179371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9189371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9199371c9d4SSatish Balay           x[i2 + 3] += xw[3];
920e48d15efSToby Isaac           idiag += 16;
921e48d15efSToby Isaac           i2 += 4;
922e48d15efSToby Isaac         }
923e48d15efSToby Isaac         break;
924e48d15efSToby Isaac       case 5:
925e48d15efSToby Isaac         for (i = 0; i < m; i++) {
926e48d15efSToby Isaac           v    = aa + 25 * ai[i];
927e48d15efSToby Isaac           vi   = aj + ai[i];
928e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9299371c9d4SSatish Balay           s[0] = b[i2];
9309371c9d4SSatish Balay           s[1] = b[i2 + 1];
9319371c9d4SSatish Balay           s[2] = b[i2 + 2];
9329371c9d4SSatish Balay           s[3] = b[i2 + 3];
9339371c9d4SSatish Balay           s[4] = b[i2 + 4];
934e48d15efSToby Isaac           while (nz--) {
935e48d15efSToby Isaac             idx   = 5 * (*vi++);
9369371c9d4SSatish Balay             xw[0] = x[idx];
9379371c9d4SSatish Balay             xw[1] = x[1 + idx];
9389371c9d4SSatish Balay             xw[2] = x[2 + idx];
9399371c9d4SSatish Balay             xw[3] = x[3 + idx];
9409371c9d4SSatish Balay             xw[4] = x[4 + idx];
941e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
942e48d15efSToby Isaac             v += 25;
943e48d15efSToby Isaac           }
944e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
9459371c9d4SSatish Balay           x[i2] += xw[0];
9469371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9479371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9489371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9499371c9d4SSatish Balay           x[i2 + 4] += xw[4];
950e48d15efSToby Isaac           idiag += 25;
951e48d15efSToby Isaac           i2 += 5;
952e48d15efSToby Isaac         }
953e48d15efSToby Isaac         break;
954e48d15efSToby Isaac       case 6:
955e48d15efSToby Isaac         for (i = 0; i < m; i++) {
956e48d15efSToby Isaac           v    = aa + 36 * ai[i];
957e48d15efSToby Isaac           vi   = aj + ai[i];
958e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9599371c9d4SSatish Balay           s[0] = b[i2];
9609371c9d4SSatish Balay           s[1] = b[i2 + 1];
9619371c9d4SSatish Balay           s[2] = b[i2 + 2];
9629371c9d4SSatish Balay           s[3] = b[i2 + 3];
9639371c9d4SSatish Balay           s[4] = b[i2 + 4];
9649371c9d4SSatish Balay           s[5] = b[i2 + 5];
965e48d15efSToby Isaac           while (nz--) {
966e48d15efSToby Isaac             idx   = 6 * (*vi++);
9679371c9d4SSatish Balay             xw[0] = x[idx];
9689371c9d4SSatish Balay             xw[1] = x[1 + idx];
9699371c9d4SSatish Balay             xw[2] = x[2 + idx];
9709371c9d4SSatish Balay             xw[3] = x[3 + idx];
9719371c9d4SSatish Balay             xw[4] = x[4 + idx];
9729371c9d4SSatish Balay             xw[5] = x[5 + idx];
973e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
974e48d15efSToby Isaac             v += 36;
975e48d15efSToby Isaac           }
976e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
9779371c9d4SSatish Balay           x[i2] += xw[0];
9789371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9799371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9809371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9819371c9d4SSatish Balay           x[i2 + 4] += xw[4];
9829371c9d4SSatish Balay           x[i2 + 5] += xw[5];
983e48d15efSToby Isaac           idiag += 36;
984e48d15efSToby Isaac           i2 += 6;
985e48d15efSToby Isaac         }
986e48d15efSToby Isaac         break;
987e48d15efSToby Isaac       case 7:
988e48d15efSToby Isaac         for (i = 0; i < m; i++) {
989e48d15efSToby Isaac           v    = aa + 49 * ai[i];
990e48d15efSToby Isaac           vi   = aj + ai[i];
991e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9929371c9d4SSatish Balay           s[0] = b[i2];
9939371c9d4SSatish Balay           s[1] = b[i2 + 1];
9949371c9d4SSatish Balay           s[2] = b[i2 + 2];
9959371c9d4SSatish Balay           s[3] = b[i2 + 3];
9969371c9d4SSatish Balay           s[4] = b[i2 + 4];
9979371c9d4SSatish Balay           s[5] = b[i2 + 5];
9989371c9d4SSatish Balay           s[6] = b[i2 + 6];
999e48d15efSToby Isaac           while (nz--) {
1000e48d15efSToby Isaac             idx   = 7 * (*vi++);
10019371c9d4SSatish Balay             xw[0] = x[idx];
10029371c9d4SSatish Balay             xw[1] = x[1 + idx];
10039371c9d4SSatish Balay             xw[2] = x[2 + idx];
10049371c9d4SSatish Balay             xw[3] = x[3 + idx];
10059371c9d4SSatish Balay             xw[4] = x[4 + idx];
10069371c9d4SSatish Balay             xw[5] = x[5 + idx];
10079371c9d4SSatish Balay             xw[6] = x[6 + idx];
1008e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1009e48d15efSToby Isaac             v += 49;
1010e48d15efSToby Isaac           }
1011e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
10129371c9d4SSatish Balay           x[i2] += xw[0];
10139371c9d4SSatish Balay           x[i2 + 1] += xw[1];
10149371c9d4SSatish Balay           x[i2 + 2] += xw[2];
10159371c9d4SSatish Balay           x[i2 + 3] += xw[3];
10169371c9d4SSatish Balay           x[i2 + 4] += xw[4];
10179371c9d4SSatish Balay           x[i2 + 5] += xw[5];
10189371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1019e48d15efSToby Isaac           idiag += 49;
1020e48d15efSToby Isaac           i2 += 7;
1021e48d15efSToby Isaac         }
1022e48d15efSToby Isaac         break;
1023e48d15efSToby Isaac       default:
1024e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1025e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1026e48d15efSToby Isaac           vi = aj + ai[i];
1027e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1028e48d15efSToby Isaac 
10299566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1030e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1031e48d15efSToby Isaac           workt = work;
1032e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
10339566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1034e48d15efSToby Isaac             workt += bs;
1035e48d15efSToby Isaac           }
1036e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1037e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1038e48d15efSToby Isaac 
1039e48d15efSToby Isaac           idiag += bs2;
1040e48d15efSToby Isaac           i2 += bs;
1041e48d15efSToby Isaac         }
1042e48d15efSToby Isaac         break;
1043e48d15efSToby Isaac       }
10449566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1045e48d15efSToby Isaac     }
1046e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1047e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
1048e48d15efSToby Isaac       i2    = bs * (m - 1);
1049e48d15efSToby Isaac       switch (bs) {
1050e48d15efSToby Isaac       case 1:
1051e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1052e48d15efSToby Isaac           v    = aa + ai[i];
1053e48d15efSToby Isaac           vi   = aj + ai[i];
1054e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
1055e48d15efSToby Isaac           s[0] = b[i2];
1056e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1057e48d15efSToby Isaac             xw[0] = x[vi[j]];
1058e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1059e48d15efSToby Isaac           }
1060e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1061e48d15efSToby Isaac           x[i2] += xw[0];
1062e48d15efSToby Isaac           idiag -= 1;
1063e48d15efSToby Isaac           i2 -= 1;
1064e48d15efSToby Isaac         }
1065e48d15efSToby Isaac         break;
1066e48d15efSToby Isaac       case 2:
1067e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1068e48d15efSToby Isaac           v    = aa + 4 * ai[i];
1069e48d15efSToby Isaac           vi   = aj + ai[i];
1070e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10719371c9d4SSatish Balay           s[0] = b[i2];
10729371c9d4SSatish Balay           s[1] = b[i2 + 1];
1073e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1074e48d15efSToby Isaac             idx   = 2 * vi[j];
1075e48d15efSToby Isaac             it    = 4 * j;
10769371c9d4SSatish Balay             xw[0] = x[idx];
10779371c9d4SSatish Balay             xw[1] = x[1 + idx];
1078e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1079e48d15efSToby Isaac           }
1080e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
10819371c9d4SSatish Balay           x[i2] += xw[0];
10829371c9d4SSatish Balay           x[i2 + 1] += xw[1];
1083e48d15efSToby Isaac           idiag -= 4;
1084e48d15efSToby Isaac           i2 -= 2;
1085e48d15efSToby Isaac         }
1086e48d15efSToby Isaac         break;
1087e48d15efSToby Isaac       case 3:
1088e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1089e48d15efSToby Isaac           v    = aa + 9 * ai[i];
1090e48d15efSToby Isaac           vi   = aj + ai[i];
1091e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10929371c9d4SSatish Balay           s[0] = b[i2];
10939371c9d4SSatish Balay           s[1] = b[i2 + 1];
10949371c9d4SSatish Balay           s[2] = b[i2 + 2];
1095e48d15efSToby Isaac           while (nz--) {
1096e48d15efSToby Isaac             idx   = 3 * (*vi++);
10979371c9d4SSatish Balay             xw[0] = x[idx];
10989371c9d4SSatish Balay             xw[1] = x[1 + idx];
10999371c9d4SSatish Balay             xw[2] = x[2 + idx];
1100e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1101e48d15efSToby Isaac             v += 9;
1102e48d15efSToby Isaac           }
1103e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
11049371c9d4SSatish Balay           x[i2] += xw[0];
11059371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11069371c9d4SSatish Balay           x[i2 + 2] += xw[2];
1107e48d15efSToby Isaac           idiag -= 9;
1108e48d15efSToby Isaac           i2 -= 3;
1109e48d15efSToby Isaac         }
1110e48d15efSToby Isaac         break;
1111e48d15efSToby Isaac       case 4:
1112e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1113e48d15efSToby Isaac           v    = aa + 16 * ai[i];
1114e48d15efSToby Isaac           vi   = aj + ai[i];
1115e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11169371c9d4SSatish Balay           s[0] = b[i2];
11179371c9d4SSatish Balay           s[1] = b[i2 + 1];
11189371c9d4SSatish Balay           s[2] = b[i2 + 2];
11199371c9d4SSatish Balay           s[3] = b[i2 + 3];
1120e48d15efSToby Isaac           while (nz--) {
1121e48d15efSToby Isaac             idx   = 4 * (*vi++);
11229371c9d4SSatish Balay             xw[0] = x[idx];
11239371c9d4SSatish Balay             xw[1] = x[1 + idx];
11249371c9d4SSatish Balay             xw[2] = x[2 + idx];
11259371c9d4SSatish Balay             xw[3] = x[3 + idx];
1126e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1127e48d15efSToby Isaac             v += 16;
1128e48d15efSToby Isaac           }
1129e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
11309371c9d4SSatish Balay           x[i2] += xw[0];
11319371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11329371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11339371c9d4SSatish Balay           x[i2 + 3] += xw[3];
1134e48d15efSToby Isaac           idiag -= 16;
1135e48d15efSToby Isaac           i2 -= 4;
1136e48d15efSToby Isaac         }
1137e48d15efSToby Isaac         break;
1138e48d15efSToby Isaac       case 5:
1139e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1140e48d15efSToby Isaac           v    = aa + 25 * ai[i];
1141e48d15efSToby Isaac           vi   = aj + ai[i];
1142e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11439371c9d4SSatish Balay           s[0] = b[i2];
11449371c9d4SSatish Balay           s[1] = b[i2 + 1];
11459371c9d4SSatish Balay           s[2] = b[i2 + 2];
11469371c9d4SSatish Balay           s[3] = b[i2 + 3];
11479371c9d4SSatish Balay           s[4] = b[i2 + 4];
1148e48d15efSToby Isaac           while (nz--) {
1149e48d15efSToby Isaac             idx   = 5 * (*vi++);
11509371c9d4SSatish Balay             xw[0] = x[idx];
11519371c9d4SSatish Balay             xw[1] = x[1 + idx];
11529371c9d4SSatish Balay             xw[2] = x[2 + idx];
11539371c9d4SSatish Balay             xw[3] = x[3 + idx];
11549371c9d4SSatish Balay             xw[4] = x[4 + idx];
1155e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1156e48d15efSToby Isaac             v += 25;
1157e48d15efSToby Isaac           }
1158e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
11599371c9d4SSatish Balay           x[i2] += xw[0];
11609371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11619371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11629371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11639371c9d4SSatish Balay           x[i2 + 4] += xw[4];
1164e48d15efSToby Isaac           idiag -= 25;
1165e48d15efSToby Isaac           i2 -= 5;
1166e48d15efSToby Isaac         }
1167e48d15efSToby Isaac         break;
1168e48d15efSToby Isaac       case 6:
1169e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1170e48d15efSToby Isaac           v    = aa + 36 * ai[i];
1171e48d15efSToby Isaac           vi   = aj + ai[i];
1172e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11739371c9d4SSatish Balay           s[0] = b[i2];
11749371c9d4SSatish Balay           s[1] = b[i2 + 1];
11759371c9d4SSatish Balay           s[2] = b[i2 + 2];
11769371c9d4SSatish Balay           s[3] = b[i2 + 3];
11779371c9d4SSatish Balay           s[4] = b[i2 + 4];
11789371c9d4SSatish Balay           s[5] = b[i2 + 5];
1179e48d15efSToby Isaac           while (nz--) {
1180e48d15efSToby Isaac             idx   = 6 * (*vi++);
11819371c9d4SSatish Balay             xw[0] = x[idx];
11829371c9d4SSatish Balay             xw[1] = x[1 + idx];
11839371c9d4SSatish Balay             xw[2] = x[2 + idx];
11849371c9d4SSatish Balay             xw[3] = x[3 + idx];
11859371c9d4SSatish Balay             xw[4] = x[4 + idx];
11869371c9d4SSatish Balay             xw[5] = x[5 + idx];
1187e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1188e48d15efSToby Isaac             v += 36;
1189e48d15efSToby Isaac           }
1190e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
11919371c9d4SSatish Balay           x[i2] += xw[0];
11929371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11939371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11949371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11959371c9d4SSatish Balay           x[i2 + 4] += xw[4];
11969371c9d4SSatish Balay           x[i2 + 5] += xw[5];
1197e48d15efSToby Isaac           idiag -= 36;
1198e48d15efSToby Isaac           i2 -= 6;
1199e48d15efSToby Isaac         }
1200e48d15efSToby Isaac         break;
1201e48d15efSToby Isaac       case 7:
1202e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1203e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1204e48d15efSToby Isaac           vi   = aj + ai[i];
1205e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
12069371c9d4SSatish Balay           s[0] = b[i2];
12079371c9d4SSatish Balay           s[1] = b[i2 + 1];
12089371c9d4SSatish Balay           s[2] = b[i2 + 2];
12099371c9d4SSatish Balay           s[3] = b[i2 + 3];
12109371c9d4SSatish Balay           s[4] = b[i2 + 4];
12119371c9d4SSatish Balay           s[5] = b[i2 + 5];
12129371c9d4SSatish Balay           s[6] = b[i2 + 6];
1213e48d15efSToby Isaac           while (nz--) {
1214e48d15efSToby Isaac             idx   = 7 * (*vi++);
12159371c9d4SSatish Balay             xw[0] = x[idx];
12169371c9d4SSatish Balay             xw[1] = x[1 + idx];
12179371c9d4SSatish Balay             xw[2] = x[2 + idx];
12189371c9d4SSatish Balay             xw[3] = x[3 + idx];
12199371c9d4SSatish Balay             xw[4] = x[4 + idx];
12209371c9d4SSatish Balay             xw[5] = x[5 + idx];
12219371c9d4SSatish Balay             xw[6] = x[6 + idx];
1222e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1223e48d15efSToby Isaac             v += 49;
1224e48d15efSToby Isaac           }
1225e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
12269371c9d4SSatish Balay           x[i2] += xw[0];
12279371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12289371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12299371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12309371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12319371c9d4SSatish Balay           x[i2 + 5] += xw[5];
12329371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1233e48d15efSToby Isaac           idiag -= 49;
1234e48d15efSToby Isaac           i2 -= 7;
1235e48d15efSToby Isaac         }
1236e48d15efSToby Isaac         break;
1237e48d15efSToby Isaac       default:
1238e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1239e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1240e48d15efSToby Isaac           vi = aj + ai[i];
1241e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1242e48d15efSToby Isaac 
12439566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1244e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1245e48d15efSToby Isaac           workt = work;
1246e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
12479566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1248e48d15efSToby Isaac             workt += bs;
1249e48d15efSToby Isaac           }
1250e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1251e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1252e48d15efSToby Isaac 
1253e48d15efSToby Isaac           idiag -= bs2;
1254e48d15efSToby Isaac           i2 -= bs;
1255e48d15efSToby Isaac         }
1256e48d15efSToby Isaac         break;
1257e48d15efSToby Isaac       }
12589566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1259e48d15efSToby Isaac     }
1260e48d15efSToby Isaac   }
12619566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1264de80f912SBarry Smith }
1265de80f912SBarry Smith 
1266af674e45SBarry Smith /*
126781824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1268af674e45SBarry Smith */
1269af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1270af674e45SBarry Smith   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1271af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1272af674e45SBarry Smith   #define matsetvaluesblocked4_ matsetvaluesblocked4
1273af674e45SBarry Smith #endif
1274af674e45SBarry Smith 
1275d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1276d71ae5a4SJacob Faibussowitsch {
1277af674e45SBarry Smith   Mat                A = *AA;
1278af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1279c1ac3661SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1280c1ac3661SBarry Smith   PetscInt          *ai = a->i, *ailen = a->ilen;
128117ec6a02SBarry Smith   PetscInt          *aj = a->j, stepval, lastcol = -1;
1282f15d580aSBarry Smith   const PetscScalar *value = v;
12834bb09213Spetsc   MatScalar         *ap, *aa = a->a, *bap;
1284af674e45SBarry Smith 
1285af674e45SBarry Smith   PetscFunctionBegin;
1286ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1287af674e45SBarry Smith   stepval = (n - 1) * 4;
1288af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
1289af674e45SBarry Smith     row  = im[k];
1290af674e45SBarry Smith     rp   = aj + ai[row];
1291af674e45SBarry Smith     ap   = aa + 16 * ai[row];
1292af674e45SBarry Smith     nrow = ailen[row];
1293af674e45SBarry Smith     low  = 0;
129417ec6a02SBarry Smith     high = nrow;
1295af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
1296af674e45SBarry Smith       col = in[l];
1297db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1298db4deed7SKarl Rupp       else high = nrow;
129917ec6a02SBarry Smith       lastcol = col;
13001e3347e8SBarry Smith       value   = v + k * (stepval + 4 + l) * 4;
1301af674e45SBarry Smith       while (high - low > 7) {
1302af674e45SBarry Smith         t = (low + high) / 2;
1303af674e45SBarry Smith         if (rp[t] > col) high = t;
1304af674e45SBarry Smith         else low = t;
1305af674e45SBarry Smith       }
1306af674e45SBarry Smith       for (i = low; i < high; i++) {
1307af674e45SBarry Smith         if (rp[i] > col) break;
1308af674e45SBarry Smith         if (rp[i] == col) {
1309af674e45SBarry Smith           bap = ap + 16 * i;
1310af674e45SBarry Smith           for (ii = 0; ii < 4; ii++, value += stepval) {
1311ad540459SPierre Jolivet             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1312af674e45SBarry Smith           }
1313af674e45SBarry Smith           goto noinsert2;
1314af674e45SBarry Smith         }
1315af674e45SBarry Smith       }
1316af674e45SBarry Smith       N = nrow++ - 1;
131717ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
1318af674e45SBarry Smith       /* shift up all the later entries in this row */
1319af674e45SBarry Smith       for (ii = N; ii >= i; ii--) {
1320af674e45SBarry Smith         rp[ii + 1] = rp[ii];
13219566063dSJacob Faibussowitsch         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1322af674e45SBarry Smith       }
132348a46eb9SPierre Jolivet       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1324af674e45SBarry Smith       rp[i] = col;
1325af674e45SBarry Smith       bap   = ap + 16 * i;
1326af674e45SBarry Smith       for (ii = 0; ii < 4; ii++, value += stepval) {
1327ad540459SPierre Jolivet         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1328af674e45SBarry Smith       }
1329af674e45SBarry Smith     noinsert2:;
1330af674e45SBarry Smith       low = i;
1331af674e45SBarry Smith     }
1332af674e45SBarry Smith     ailen[row] = nrow;
1333af674e45SBarry Smith   }
1334be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1335af674e45SBarry Smith }
1336af674e45SBarry Smith 
1337af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1338af674e45SBarry Smith   #define matsetvalues4_ MATSETVALUES4
1339af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1340af674e45SBarry Smith   #define matsetvalues4_ matsetvalues4
1341af674e45SBarry Smith #endif
1342af674e45SBarry Smith 
1343d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1344d71ae5a4SJacob Faibussowitsch {
1345af674e45SBarry Smith   Mat          A = *AA;
1346af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1347580bdb30SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1348c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
1349c1ac3661SBarry Smith   PetscInt    *aj = a->j, brow, bcol;
135017ec6a02SBarry Smith   PetscInt     ridx, cidx, lastcol = -1;
1351af674e45SBarry Smith   MatScalar   *ap, value, *aa      = a->a, *bap;
1352af674e45SBarry Smith 
1353af674e45SBarry Smith   PetscFunctionBegin;
1354af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
13559371c9d4SSatish Balay     row  = im[k];
13569371c9d4SSatish Balay     brow = row / 4;
1357af674e45SBarry Smith     rp   = aj + ai[brow];
1358af674e45SBarry Smith     ap   = aa + 16 * ai[brow];
1359af674e45SBarry Smith     nrow = ailen[brow];
1360af674e45SBarry Smith     low  = 0;
136117ec6a02SBarry Smith     high = nrow;
1362af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
13639371c9d4SSatish Balay       col   = in[l];
13649371c9d4SSatish Balay       bcol  = col / 4;
13659371c9d4SSatish Balay       ridx  = row % 4;
13669371c9d4SSatish Balay       cidx  = col % 4;
1367af674e45SBarry Smith       value = v[l + k * n];
1368db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1369db4deed7SKarl Rupp       else high = nrow;
137017ec6a02SBarry Smith       lastcol = col;
1371af674e45SBarry Smith       while (high - low > 7) {
1372af674e45SBarry Smith         t = (low + high) / 2;
1373af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1374af674e45SBarry Smith         else low = t;
1375af674e45SBarry Smith       }
1376af674e45SBarry Smith       for (i = low; i < high; i++) {
1377af674e45SBarry Smith         if (rp[i] > bcol) break;
1378af674e45SBarry Smith         if (rp[i] == bcol) {
1379af674e45SBarry Smith           bap = ap + 16 * i + 4 * cidx + ridx;
1380af674e45SBarry Smith           *bap += value;
1381af674e45SBarry Smith           goto noinsert1;
1382af674e45SBarry Smith         }
1383af674e45SBarry Smith       }
1384af674e45SBarry Smith       N = nrow++ - 1;
138517ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1386af674e45SBarry Smith       /* shift up all the later entries in this row */
13879566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
13889566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
13899566063dSJacob Faibussowitsch       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1390af674e45SBarry Smith       rp[i]                        = bcol;
1391af674e45SBarry Smith       ap[16 * i + 4 * cidx + ridx] = value;
1392af674e45SBarry Smith     noinsert1:;
1393af674e45SBarry Smith       low = i;
1394af674e45SBarry Smith     }
1395af674e45SBarry Smith     ailen[brow] = nrow;
1396af674e45SBarry Smith   }
1397be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1398af674e45SBarry Smith }
1399af674e45SBarry Smith 
1400be5855fcSBarry Smith /*
1401be5855fcSBarry Smith      Checks for missing diagonals
1402be5855fcSBarry Smith */
1403d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1404d71ae5a4SJacob Faibussowitsch {
1405be5855fcSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14067734d3b5SMatthew G. Knepley   PetscInt    *diag, *ii = a->i, i;
1407be5855fcSBarry Smith 
1408be5855fcSBarry Smith   PetscFunctionBegin;
14099566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
14102af78befSBarry Smith   *missing = PETSC_FALSE;
14117734d3b5SMatthew G. Knepley   if (A->rmap->n > 0 && !ii) {
14122efa7f71SHong Zhang     *missing = PETSC_TRUE;
14132efa7f71SHong Zhang     if (d) *d = 0;
14149566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
14152efa7f71SHong Zhang   } else {
141601445905SHong Zhang     PetscInt n;
141701445905SHong Zhang     n    = PetscMin(a->mbs, a->nbs);
1418883fce79SBarry Smith     diag = a->diag;
141901445905SHong Zhang     for (i = 0; i < n; i++) {
14207734d3b5SMatthew G. Knepley       if (diag[i] >= ii[i + 1]) {
14212af78befSBarry Smith         *missing = PETSC_TRUE;
14222af78befSBarry Smith         if (d) *d = i;
14239566063dSJacob Faibussowitsch         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1424358d2f5dSShri Abhyankar         break;
14252efa7f71SHong Zhang       }
1426be5855fcSBarry Smith     }
1427be5855fcSBarry Smith   }
14283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1429be5855fcSBarry Smith }
1430be5855fcSBarry Smith 
1431d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1432d71ae5a4SJacob Faibussowitsch {
1433de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
143409f38230SBarry Smith   PetscInt     i, j, m = a->mbs;
1435de6a44a3SBarry Smith 
14363a40ed3dSBarry Smith   PetscFunctionBegin;
143709f38230SBarry Smith   if (!a->diag) {
14389566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(m, &a->diag));
14394fd072dbSBarry Smith     a->free_diag = PETSC_TRUE;
144009f38230SBarry Smith   }
14417fc0212eSBarry Smith   for (i = 0; i < m; i++) {
144209f38230SBarry Smith     a->diag[i] = a->i[i + 1];
1443de6a44a3SBarry Smith     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1444de6a44a3SBarry Smith       if (a->j[j] == i) {
144509f38230SBarry Smith         a->diag[i] = j;
1446de6a44a3SBarry Smith         break;
1447de6a44a3SBarry Smith       }
1448de6a44a3SBarry Smith     }
1449de6a44a3SBarry Smith   }
14503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1451de6a44a3SBarry Smith }
14522593348eSBarry Smith 
1453d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1454d71ae5a4SJacob Faibussowitsch {
14553b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14561a83f524SJed Brown   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
14571a83f524SJed Brown   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
14583b2fbd54SBarry Smith 
14593a40ed3dSBarry Smith   PetscFunctionBegin;
14603b2fbd54SBarry Smith   *nn = n;
14613ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
14623b2fbd54SBarry Smith   if (symmetric) {
14639566063dSJacob Faibussowitsch     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1464553b3c51SBarry Smith     nz = tia[n];
14653b2fbd54SBarry Smith   } else {
14669371c9d4SSatish Balay     tia = a->i;
14679371c9d4SSatish Balay     tja = a->j;
14683b2fbd54SBarry Smith   }
14693b2fbd54SBarry Smith 
1470ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1471ecc77c7aSBarry Smith     (*nn) *= bs;
14728f7157efSSatish Balay     /* malloc & create the natural set of indices */
14739566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((n + 1) * bs, ia));
14749985e31cSBarry Smith     if (n) {
14752462f5fdSStefano Zampini       (*ia)[0] = oshift;
1476ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
14779985e31cSBarry Smith     }
1478ecc77c7aSBarry Smith 
1479ecc77c7aSBarry Smith     for (i = 1; i < n; i++) {
1480ecc77c7aSBarry Smith       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1481ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
14828f7157efSSatish Balay     }
1483ad540459SPierre Jolivet     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1484ecc77c7aSBarry Smith 
14851a83f524SJed Brown     if (inja) {
14869566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz * bs * bs, ja));
14879985e31cSBarry Smith       cnt = 0;
14889985e31cSBarry Smith       for (i = 0; i < n; i++) {
14899985e31cSBarry Smith         for (j = 0; j < bs; j++) {
14909985e31cSBarry Smith           for (k = tia[i]; k < tia[i + 1]; k++) {
1491ad540459SPierre Jolivet             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
14929985e31cSBarry Smith           }
14939985e31cSBarry Smith         }
14949985e31cSBarry Smith       }
14959985e31cSBarry Smith     }
14969985e31cSBarry Smith 
14978f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
14989566063dSJacob Faibussowitsch       PetscCall(PetscFree(tia));
14999566063dSJacob Faibussowitsch       PetscCall(PetscFree(tja));
15008f7157efSSatish Balay     }
1501f6d58c54SBarry Smith   } else if (oshift == 1) {
1502715a17b5SBarry Smith     if (symmetric) {
1503a2ea699eSBarry Smith       nz = tia[A->rmap->n / bs];
1504715a17b5SBarry Smith       /*  add 1 to i and j indices */
1505715a17b5SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1506715a17b5SBarry Smith       *ia = tia;
1507715a17b5SBarry Smith       if (ja) {
1508715a17b5SBarry Smith         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1509715a17b5SBarry Smith         *ja = tja;
1510715a17b5SBarry Smith       }
1511715a17b5SBarry Smith     } else {
1512a2ea699eSBarry Smith       nz = a->i[A->rmap->n / bs];
1513f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
15149566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1515f6d58c54SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1516f6d58c54SBarry Smith       if (ja) {
15179566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(nz, ja));
1518f6d58c54SBarry Smith         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1519f6d58c54SBarry Smith       }
1520715a17b5SBarry Smith     }
15218f7157efSSatish Balay   } else {
15228f7157efSSatish Balay     *ia = tia;
1523ecc77c7aSBarry Smith     if (ja) *ja = tja;
15248f7157efSSatish Balay   }
15253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15263b2fbd54SBarry Smith }
15273b2fbd54SBarry Smith 
1528d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1529d71ae5a4SJacob Faibussowitsch {
15303a40ed3dSBarry Smith   PetscFunctionBegin;
15313ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1532715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
15339566063dSJacob Faibussowitsch     PetscCall(PetscFree(*ia));
15349566063dSJacob Faibussowitsch     if (ja) PetscCall(PetscFree(*ja));
15353b2fbd54SBarry Smith   }
15363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15373b2fbd54SBarry Smith }
15383b2fbd54SBarry Smith 
1539d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1540d71ae5a4SJacob Faibussowitsch {
15412d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15422d61bbb3SSatish Balay 
1543433994e6SBarry Smith   PetscFunctionBegin;
1544aa482453SBarry Smith #if defined(PETSC_USE_LOG)
15453ba16761SJacob Faibussowitsch   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
15462d61bbb3SSatish Balay #endif
15479566063dSJacob Faibussowitsch   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
15489566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
15499566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
15509566063dSJacob Faibussowitsch   if (a->free_diag) PetscCall(PetscFree(a->diag));
15519566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->idiag));
15529566063dSJacob Faibussowitsch   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
15539566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->solve_work));
15549566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->mult_work));
15559566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_workt));
15569566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_work));
15579566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
15589566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->saved_values));
15599566063dSJacob Faibussowitsch   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1560c4319e64SHong Zhang 
15619566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->sbaijMat));
15629566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->parent));
15639566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1564901853e0SKris Buschelman 
15659566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
15669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
15679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
15689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
15699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
15709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
15719566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
15729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
15739566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
15749566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
15759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
15769566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
15777ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
15789566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
15797ea3e4caSstefano_zampini #endif
15809566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
15812e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
15823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15832d61bbb3SSatish Balay }
15842d61bbb3SSatish Balay 
1585d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1586d71ae5a4SJacob Faibussowitsch {
15872d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15882d61bbb3SSatish Balay 
15892d61bbb3SSatish Balay   PetscFunctionBegin;
1590aa275fccSKris Buschelman   switch (op) {
1591d71ae5a4SJacob Faibussowitsch   case MAT_ROW_ORIENTED:
1592d71ae5a4SJacob Faibussowitsch     a->roworiented = flg;
1593d71ae5a4SJacob Faibussowitsch     break;
1594d71ae5a4SJacob Faibussowitsch   case MAT_KEEP_NONZERO_PATTERN:
1595d71ae5a4SJacob Faibussowitsch     a->keepnonzeropattern = flg;
1596d71ae5a4SJacob Faibussowitsch     break;
1597d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATIONS:
1598d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? 0 : 1);
1599d71ae5a4SJacob Faibussowitsch     break;
1600d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATION_ERR:
1601d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -1 : 0);
1602d71ae5a4SJacob Faibussowitsch     break;
1603d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1604d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -2 : 0);
1605d71ae5a4SJacob Faibussowitsch     break;
1606d71ae5a4SJacob Faibussowitsch   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1607d71ae5a4SJacob Faibussowitsch     a->nounused = (flg ? -1 : 0);
1608d71ae5a4SJacob Faibussowitsch     break;
16098c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1610aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
1611aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
1612d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
1613d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1614d71ae5a4SJacob Faibussowitsch     break;
16155021d80fSJed Brown   case MAT_SPD:
161677e54ba9SKris Buschelman   case MAT_SYMMETRIC:
161777e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
16189a4540c5SBarry Smith   case MAT_HERMITIAN:
16199a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1620b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1621c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1622672ba085SHong Zhang   case MAT_STRUCTURE_ONLY:
1623b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
1624b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
162577e54ba9SKris Buschelman     break;
1626d71ae5a4SJacob Faibussowitsch   default:
1627d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
16282d61bbb3SSatish Balay   }
16293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16302d61bbb3SSatish Balay }
16312d61bbb3SSatish Balay 
163252768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
1633d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1634d71ae5a4SJacob Faibussowitsch {
163552768537SHong Zhang   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
163652768537SHong Zhang   MatScalar   *aa_i;
163787828ca2SBarry Smith   PetscScalar *v_i;
16382d61bbb3SSatish Balay 
16392d61bbb3SSatish Balay   PetscFunctionBegin;
1640d0f46423SBarry Smith   bs  = A->rmap->bs;
164152768537SHong Zhang   bs2 = bs * bs;
16425f80ce2aSJacob Faibussowitsch   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
16432d61bbb3SSatish Balay 
16442d61bbb3SSatish Balay   bn  = row / bs; /* Block number */
16452d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
16462d61bbb3SSatish Balay   M   = ai[bn + 1] - ai[bn];
16472d61bbb3SSatish Balay   *nz = bs * M;
16482d61bbb3SSatish Balay 
16492d61bbb3SSatish Balay   if (v) {
1650f4259b30SLisandro Dalcin     *v = NULL;
16512d61bbb3SSatish Balay     if (*nz) {
16529566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, v));
16532d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16542d61bbb3SSatish Balay         v_i  = *v + i * bs;
16552d61bbb3SSatish Balay         aa_i = aa + bs2 * (ai[bn] + i);
165626fbe8dcSKarl Rupp         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
16572d61bbb3SSatish Balay       }
16582d61bbb3SSatish Balay     }
16592d61bbb3SSatish Balay   }
16602d61bbb3SSatish Balay 
16612d61bbb3SSatish Balay   if (idx) {
1662f4259b30SLisandro Dalcin     *idx = NULL;
16632d61bbb3SSatish Balay     if (*nz) {
16649566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, idx));
16652d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16662d61bbb3SSatish Balay         idx_i = *idx + i * bs;
16672d61bbb3SSatish Balay         itmp  = bs * aj[ai[bn] + i];
166826fbe8dcSKarl Rupp         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
16692d61bbb3SSatish Balay       }
16702d61bbb3SSatish Balay     }
16712d61bbb3SSatish Balay   }
16723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16732d61bbb3SSatish Balay }
16742d61bbb3SSatish Balay 
1675d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1676d71ae5a4SJacob Faibussowitsch {
167752768537SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
167852768537SHong Zhang 
167952768537SHong Zhang   PetscFunctionBegin;
16809566063dSJacob Faibussowitsch   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
16813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
168252768537SHong Zhang }
168352768537SHong Zhang 
1684d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1685d71ae5a4SJacob Faibussowitsch {
16862d61bbb3SSatish Balay   PetscFunctionBegin;
1687cb4a9cd9SHong Zhang   if (nz) *nz = 0;
16889566063dSJacob Faibussowitsch   if (idx) PetscCall(PetscFree(*idx));
16899566063dSJacob Faibussowitsch   if (v) PetscCall(PetscFree(*v));
16903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16912d61bbb3SSatish Balay }
16922d61bbb3SSatish Balay 
1693d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1694d71ae5a4SJacob Faibussowitsch {
169520e84f26SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
16962d61bbb3SSatish Balay   Mat          C;
169720e84f26SHong Zhang   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
169820e84f26SHong Zhang   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
169920e84f26SHong Zhang   MatScalar   *ata, *aa = a->a;
17002d61bbb3SSatish Balay 
17012d61bbb3SSatish Balay   PetscFunctionBegin;
17027fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
17039566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1704cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
170520e84f26SHong Zhang     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
17062d61bbb3SSatish Balay 
17079566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
17089566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
17099566063dSJacob Faibussowitsch     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
17109566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
171120e84f26SHong Zhang 
171220e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
171320e84f26SHong Zhang     ati = at->i;
171420e84f26SHong Zhang     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1715fc4dec0aSBarry Smith   } else {
1716fc4dec0aSBarry Smith     C   = *B;
171720e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
171820e84f26SHong Zhang     ati = at->i;
1719fc4dec0aSBarry Smith   }
1720fc4dec0aSBarry Smith 
172120e84f26SHong Zhang   atj = at->j;
172220e84f26SHong Zhang   ata = at->a;
172320e84f26SHong Zhang 
172420e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
17259566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(atfill, ati, nbs));
172620e84f26SHong Zhang 
172720e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
17282d61bbb3SSatish Balay   for (i = 0; i < mbs; i++) {
172920e84f26SHong Zhang     anzj = ai[i + 1] - ai[i];
173020e84f26SHong Zhang     for (j = 0; j < anzj; j++) {
173120e84f26SHong Zhang       atj[atfill[*aj]] = i;
173220e84f26SHong Zhang       for (kr = 0; kr < bs; kr++) {
1733ad540459SPierre Jolivet         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
17342d61bbb3SSatish Balay       }
173520e84f26SHong Zhang       atfill[*aj++] += 1;
173620e84f26SHong Zhang     }
173720e84f26SHong Zhang   }
17389566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
17399566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
17402d61bbb3SSatish Balay 
174120e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
17429566063dSJacob Faibussowitsch   PetscCall(PetscFree(atfill));
174320e84f26SHong Zhang 
1744cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
17459566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
17462d61bbb3SSatish Balay     *B = C;
17472d61bbb3SSatish Balay   } else {
17489566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &C));
17492d61bbb3SSatish Balay   }
17503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17512d61bbb3SSatish Balay }
17522d61bbb3SSatish Balay 
1753*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1754d71ae5a4SJacob Faibussowitsch {
1755453d3561SHong Zhang   Mat Btrans;
1756453d3561SHong Zhang 
1757453d3561SHong Zhang   PetscFunctionBegin;
1758453d3561SHong Zhang   *f = PETSC_FALSE;
1759acd337a6SBarry Smith   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
17609566063dSJacob Faibussowitsch   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
17619566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Btrans));
17623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1763453d3561SHong Zhang }
1764453d3561SHong Zhang 
1765618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1766d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1767d71ae5a4SJacob Faibussowitsch {
1768b51a4376SLisandro Dalcin   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1769b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1770b51a4376SLisandro Dalcin   PetscInt    *rowlens, *colidxs;
1771b51a4376SLisandro Dalcin   PetscScalar *matvals;
17722593348eSBarry Smith 
17733a40ed3dSBarry Smith   PetscFunctionBegin;
17749566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
17753b2fbd54SBarry Smith 
1776b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1777b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1778b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1779b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1780b51a4376SLisandro Dalcin   nz = bs * bs * A->nz;
17812593348eSBarry Smith 
1782b51a4376SLisandro Dalcin   /* write matrix header */
1783b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
17849371c9d4SSatish Balay   header[1] = M;
17859371c9d4SSatish Balay   header[2] = N;
17869371c9d4SSatish Balay   header[3] = nz;
17879566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
17882593348eSBarry Smith 
1789b51a4376SLisandro Dalcin   /* store row lengths */
17909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
1791b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
17929371c9d4SSatish Balay     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
17939566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
17949566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
1795b51a4376SLisandro Dalcin 
1796b51a4376SLisandro Dalcin   /* store column indices  */
17979566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
1798b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1799b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1800b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
18019371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
18025f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
18039566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
18049566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
18052593348eSBarry Smith 
18062593348eSBarry Smith   /* store nonzero values */
18079566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
1808b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1809b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1810b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
18119371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
18125f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
18139566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
18149566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
1815ce6f0cecSBarry Smith 
1816b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
18179566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
18183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18192593348eSBarry Smith }
18202593348eSBarry Smith 
1821d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1822d71ae5a4SJacob Faibussowitsch {
18237dc0baabSHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
18247dc0baabSHong Zhang   PetscInt     i, bs = A->rmap->bs, k;
18257dc0baabSHong Zhang 
18267dc0baabSHong Zhang   PetscFunctionBegin;
18279566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
18287dc0baabSHong Zhang   for (i = 0; i < a->mbs; i++) {
18299566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
183048a46eb9SPierre Jolivet     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
18319566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18327dc0baabSHong Zhang   }
18339566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18357dc0baabSHong Zhang }
18367dc0baabSHong Zhang 
1837d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1838d71ae5a4SJacob Faibussowitsch {
1839b6490206SBarry Smith   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1840d0f46423SBarry Smith   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1841f3ef73ceSBarry Smith   PetscViewerFormat format;
18422593348eSBarry Smith 
18433a40ed3dSBarry Smith   PetscFunctionBegin;
18447dc0baabSHong Zhang   if (A->structure_only) {
18459566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
18463ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
18477dc0baabSHong Zhang   }
18487dc0baabSHong Zhang 
18499566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
1850456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
18519566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1852fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1853ade3a672SBarry Smith     const char *matname;
1854bcd9e38bSBarry Smith     Mat         aij;
18559566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
18569566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
18579566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
18589566063dSJacob Faibussowitsch     PetscCall(MatView(aij, viewer));
18599566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&aij));
186004929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
18613ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1862fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
18639566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
186444cd7ae7SLois Curfman McInnes     for (i = 0; i < a->mbs; i++) {
186544cd7ae7SLois Curfman McInnes       for (j = 0; j < bs; j++) {
18669566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
186744cd7ae7SLois Curfman McInnes         for (k = a->i[i]; k < a->i[i + 1]; k++) {
186844cd7ae7SLois Curfman McInnes           for (l = 0; l < bs; l++) {
1869aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18700e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18719371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18720e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18739371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18740e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18759566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
18760ef38995SBarry Smith             }
187744cd7ae7SLois Curfman McInnes #else
187848a46eb9SPierre Jolivet             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
187944cd7ae7SLois Curfman McInnes #endif
188044cd7ae7SLois Curfman McInnes           }
188144cd7ae7SLois Curfman McInnes         }
18829566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
188344cd7ae7SLois Curfman McInnes       }
188444cd7ae7SLois Curfman McInnes     }
18859566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18860ef38995SBarry Smith   } else {
18879566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1888b6490206SBarry Smith     for (i = 0; i < a->mbs; i++) {
1889b6490206SBarry Smith       for (j = 0; j < bs; j++) {
18909566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1891b6490206SBarry Smith         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1892b6490206SBarry Smith           for (l = 0; l < bs; l++) {
1893aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18940e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
18959371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18960e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
18979371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18980ef38995SBarry Smith             } else {
18999566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
190088685aaeSLois Curfman McInnes             }
190188685aaeSLois Curfman McInnes #else
19029566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
190388685aaeSLois Curfman McInnes #endif
19042593348eSBarry Smith           }
19052593348eSBarry Smith         }
19069566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
19072593348eSBarry Smith       }
19082593348eSBarry Smith     }
19099566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1910b6490206SBarry Smith   }
19119566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
19123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19132593348eSBarry Smith }
19142593348eSBarry Smith 
19159804daf3SBarry Smith #include <petscdraw.h>
1916d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1917d71ae5a4SJacob Faibussowitsch {
191877ed5343SBarry Smith   Mat               A = (Mat)Aa;
19193270192aSSatish Balay   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1920d0f46423SBarry Smith   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
19210e6d2581SBarry Smith   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
19223f1db9ecSBarry Smith   MatScalar        *aa;
1923b0a32e0cSBarry Smith   PetscViewer       viewer;
1924b3e7f47fSJed Brown   PetscViewerFormat format;
19253270192aSSatish Balay 
19263a40ed3dSBarry Smith   PetscFunctionBegin;
19279566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
19289566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
19299566063dSJacob Faibussowitsch   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
193077ed5343SBarry Smith 
19313270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1932b3e7f47fSJed Brown 
1933b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1934d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1935383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1936b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
19373270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19383270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19399371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19409371c9d4SSatish Balay         y_r = y_l + 1.0;
19419371c9d4SSatish Balay         x_l = a->j[j] * bs;
19429371c9d4SSatish Balay         x_r = x_l + 1.0;
19433270192aSSatish Balay         aa  = a->a + j * bs2;
19443270192aSSatish Balay         for (k = 0; k < bs; k++) {
19453270192aSSatish Balay           for (l = 0; l < bs; l++) {
19460e6d2581SBarry Smith             if (PetscRealPart(*aa++) >= 0.) continue;
19479566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19483270192aSSatish Balay           }
19493270192aSSatish Balay         }
19503270192aSSatish Balay       }
19513270192aSSatish Balay     }
1952b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
19533270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19543270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19559371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19569371c9d4SSatish Balay         y_r = y_l + 1.0;
19579371c9d4SSatish Balay         x_l = a->j[j] * bs;
19589371c9d4SSatish Balay         x_r = x_l + 1.0;
19593270192aSSatish Balay         aa  = a->a + j * bs2;
19603270192aSSatish Balay         for (k = 0; k < bs; k++) {
19613270192aSSatish Balay           for (l = 0; l < bs; l++) {
19620e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
19639566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19643270192aSSatish Balay           }
19653270192aSSatish Balay         }
19663270192aSSatish Balay       }
19673270192aSSatish Balay     }
1968b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
19693270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19703270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19719371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19729371c9d4SSatish Balay         y_r = y_l + 1.0;
19739371c9d4SSatish Balay         x_l = a->j[j] * bs;
19749371c9d4SSatish Balay         x_r = x_l + 1.0;
19753270192aSSatish Balay         aa  = a->a + j * bs2;
19763270192aSSatish Balay         for (k = 0; k < bs; k++) {
19773270192aSSatish Balay           for (l = 0; l < bs; l++) {
19780e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
19799566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19803270192aSSatish Balay           }
19813270192aSSatish Balay         }
19823270192aSSatish Balay       }
19833270192aSSatish Balay     }
1984d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
1985b3e7f47fSJed Brown   } else {
1986b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
1987b3e7f47fSJed Brown     /* first determine max of all nonzero values */
1988b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
1989b3e7f47fSJed Brown     PetscDraw popup;
1990b3e7f47fSJed Brown 
1991b3e7f47fSJed Brown     for (i = 0; i < a->nz * a->bs2; i++) {
1992b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1993b3e7f47fSJed Brown     }
1994383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
19959566063dSJacob Faibussowitsch     PetscCall(PetscDrawGetPopup(draw, &popup));
19969566063dSJacob Faibussowitsch     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1997383922c3SLisandro Dalcin 
1998d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1999b3e7f47fSJed Brown     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2000b3e7f47fSJed Brown       for (j = a->i[i]; j < a->i[i + 1]; j++) {
20019371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
20029371c9d4SSatish Balay         y_r = y_l + 1.0;
20039371c9d4SSatish Balay         x_l = a->j[j] * bs;
20049371c9d4SSatish Balay         x_r = x_l + 1.0;
2005b3e7f47fSJed Brown         aa  = a->a + j * bs2;
2006b3e7f47fSJed Brown         for (k = 0; k < bs; k++) {
2007b3e7f47fSJed Brown           for (l = 0; l < bs; l++) {
2008383922c3SLisandro Dalcin             MatScalar v = *aa++;
2009383922c3SLisandro Dalcin             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
20109566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2011b3e7f47fSJed Brown           }
2012b3e7f47fSJed Brown         }
2013b3e7f47fSJed Brown       }
2014b3e7f47fSJed Brown     }
2015d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
2016b3e7f47fSJed Brown   }
20173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
201877ed5343SBarry Smith }
20193270192aSSatish Balay 
2020d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2021d71ae5a4SJacob Faibussowitsch {
20220e6d2581SBarry Smith   PetscReal xl, yl, xr, yr, w, h;
2023b0a32e0cSBarry Smith   PetscDraw draw;
2024ace3abfcSBarry Smith   PetscBool isnull;
20253270192aSSatish Balay 
202677ed5343SBarry Smith   PetscFunctionBegin;
20279566063dSJacob Faibussowitsch   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
20289566063dSJacob Faibussowitsch   PetscCall(PetscDrawIsNull(draw, &isnull));
20293ba16761SJacob Faibussowitsch   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
203077ed5343SBarry Smith 
20319371c9d4SSatish Balay   xr = A->cmap->n;
20329371c9d4SSatish Balay   yr = A->rmap->N;
20339371c9d4SSatish Balay   h  = yr / 10.0;
20349371c9d4SSatish Balay   w  = xr / 10.0;
20359371c9d4SSatish Balay   xr += w;
20369371c9d4SSatish Balay   yr += h;
20379371c9d4SSatish Balay   xl = -w;
20389371c9d4SSatish Balay   yl = -h;
20399566063dSJacob Faibussowitsch   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
20409566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
20419566063dSJacob Faibussowitsch   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
20429566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
20439566063dSJacob Faibussowitsch   PetscCall(PetscDrawSave(draw));
20443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20453270192aSSatish Balay }
20463270192aSSatish Balay 
2047d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2048d71ae5a4SJacob Faibussowitsch {
2049ace3abfcSBarry Smith   PetscBool iascii, isbinary, isdraw;
20502593348eSBarry Smith 
20513a40ed3dSBarry Smith   PetscFunctionBegin;
20529566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
20539566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
20549566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
205532077d6dSBarry Smith   if (iascii) {
20569566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
20570f5bd95cSBarry Smith   } else if (isbinary) {
20589566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
20590f5bd95cSBarry Smith   } else if (isdraw) {
20609566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
20615cd90555SBarry Smith   } else {
2062a5e6ed63SBarry Smith     Mat B;
20639566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
20649566063dSJacob Faibussowitsch     PetscCall(MatView(B, viewer));
20659566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
20662593348eSBarry Smith   }
20673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20682593348eSBarry Smith }
2069b6490206SBarry Smith 
2070d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2071d71ae5a4SJacob Faibussowitsch {
2072cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2073c1ac3661SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2074c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
2075d0f46423SBarry Smith   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
207697e567efSBarry Smith   MatScalar   *ap, *aa = a->a;
2077cd0e1443SSatish Balay 
20783a40ed3dSBarry Smith   PetscFunctionBegin;
20792d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over rows */
20809371c9d4SSatish Balay     row  = im[k];
20819371c9d4SSatish Balay     brow = row / bs;
20829371c9d4SSatish Balay     if (row < 0) {
20839371c9d4SSatish Balay       v += n;
20849371c9d4SSatish Balay       continue;
20859371c9d4SSatish Balay     } /* negative row */
208654c59aa7SJacob Faibussowitsch     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2087d29f2997SMatthew Woehlke     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2088d29f2997SMatthew Woehlke     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
20892c3acbe9SBarry Smith     nrow = ailen[brow];
20902d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over columns */
20919371c9d4SSatish Balay       if (in[l] < 0) {
20929371c9d4SSatish Balay         v++;
20939371c9d4SSatish Balay         continue;
20949371c9d4SSatish Balay       } /* negative column */
209554c59aa7SJacob Faibussowitsch       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
20962d61bbb3SSatish Balay       col  = in[l];
20972d61bbb3SSatish Balay       bcol = col / bs;
20982d61bbb3SSatish Balay       cidx = col % bs;
20992d61bbb3SSatish Balay       ridx = row % bs;
21002d61bbb3SSatish Balay       high = nrow;
21012d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
21022d61bbb3SSatish Balay       while (high - low > 5) {
2103cd0e1443SSatish Balay         t = (low + high) / 2;
2104cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
2105cd0e1443SSatish Balay         else low = t;
2106cd0e1443SSatish Balay       }
2107cd0e1443SSatish Balay       for (i = low; i < high; i++) {
2108cd0e1443SSatish Balay         if (rp[i] > bcol) break;
2109cd0e1443SSatish Balay         if (rp[i] == bcol) {
21102d61bbb3SSatish Balay           *v++ = ap[bs2 * i + bs * cidx + ridx];
21112d61bbb3SSatish Balay           goto finished;
2112cd0e1443SSatish Balay         }
2113cd0e1443SSatish Balay       }
211497e567efSBarry Smith       *v++ = 0.0;
21152d61bbb3SSatish Balay     finished:;
2116cd0e1443SSatish Balay     }
2117cd0e1443SSatish Balay   }
21183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2119cd0e1443SSatish Balay }
2120cd0e1443SSatish Balay 
2121d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2122d71ae5a4SJacob Faibussowitsch {
212392c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2124e2ee6c50SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2125c1ac3661SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2126d0f46423SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2127ace3abfcSBarry Smith   PetscBool          roworiented = a->roworiented;
2128dd6ea824SBarry Smith   const PetscScalar *value       = v;
21299d243f67SHong Zhang   MatScalar         *ap = NULL, *aa = a->a, *bap;
213092c4ed94SBarry Smith 
21313a40ed3dSBarry Smith   PetscFunctionBegin;
21320e324ae4SSatish Balay   if (roworiented) {
21330e324ae4SSatish Balay     stepval = (n - 1) * bs;
21340e324ae4SSatish Balay   } else {
21350e324ae4SSatish Balay     stepval = (m - 1) * bs;
21360e324ae4SSatish Balay   }
213792c4ed94SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
213892c4ed94SBarry Smith     row = im[k];
21395ef9f2a5SBarry Smith     if (row < 0) continue;
21406bdcaf15SBarry Smith     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
214192c4ed94SBarry Smith     rp = aj + ai[row];
21427dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[row];
214392c4ed94SBarry Smith     rmax = imax[row];
214492c4ed94SBarry Smith     nrow = ailen[row];
214592c4ed94SBarry Smith     low  = 0;
2146c71e6ed7SBarry Smith     high = nrow;
214792c4ed94SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
21485ef9f2a5SBarry Smith       if (in[l] < 0) continue;
21496bdcaf15SBarry Smith       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
215092c4ed94SBarry Smith       col = in[l];
21517dc0baabSHong Zhang       if (!A->structure_only) {
215292c4ed94SBarry Smith         if (roworiented) {
215353ef36baSBarry Smith           value = v + (k * (stepval + bs) + l) * bs;
21540e324ae4SSatish Balay         } else {
215553ef36baSBarry Smith           value = v + (l * (stepval + bs) + k) * bs;
215692c4ed94SBarry Smith         }
21577dc0baabSHong Zhang       }
215826fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
215926fbe8dcSKarl Rupp       else high = nrow;
2160e2ee6c50SBarry Smith       lastcol = col;
216192c4ed94SBarry Smith       while (high - low > 7) {
216292c4ed94SBarry Smith         t = (low + high) / 2;
216392c4ed94SBarry Smith         if (rp[t] > col) high = t;
216492c4ed94SBarry Smith         else low = t;
216592c4ed94SBarry Smith       }
216692c4ed94SBarry Smith       for (i = low; i < high; i++) {
216792c4ed94SBarry Smith         if (rp[i] > col) break;
216892c4ed94SBarry Smith         if (rp[i] == col) {
21697dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
21708a84c255SSatish Balay           bap = ap + bs2 * i;
21710e324ae4SSatish Balay           if (roworiented) {
21728a84c255SSatish Balay             if (is == ADD_VALUES) {
2173dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2174ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2175dd9472c6SBarry Smith               }
21760e324ae4SSatish Balay             } else {
2177dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2178ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2179dd9472c6SBarry Smith               }
2180dd9472c6SBarry Smith             }
21810e324ae4SSatish Balay           } else {
21820e324ae4SSatish Balay             if (is == ADD_VALUES) {
218353ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2184ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
218553ef36baSBarry Smith                 bap += bs;
2186dd9472c6SBarry Smith               }
21870e324ae4SSatish Balay             } else {
218853ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2189ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
219053ef36baSBarry Smith                 bap += bs;
21918a84c255SSatish Balay               }
2192dd9472c6SBarry Smith             }
2193dd9472c6SBarry Smith           }
2194f1241b54SBarry Smith           goto noinsert2;
219592c4ed94SBarry Smith         }
219692c4ed94SBarry Smith       }
219789280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
21985f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
21997dc0baabSHong Zhang       if (A->structure_only) {
22007dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
22017dc0baabSHong Zhang       } else {
2202fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
22037dc0baabSHong Zhang       }
22049371c9d4SSatish Balay       N = nrow++ - 1;
22059371c9d4SSatish Balay       high++;
220692c4ed94SBarry Smith       /* shift up all the later entries in this row */
22079566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
220892c4ed94SBarry Smith       rp[i] = col;
22097dc0baabSHong Zhang       if (!A->structure_only) {
22109566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
22118a84c255SSatish Balay         bap = ap + bs2 * i;
22120e324ae4SSatish Balay         if (roworiented) {
2213dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2214ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2215dd9472c6SBarry Smith           }
22160e324ae4SSatish Balay         } else {
2217dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2218ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2219dd9472c6SBarry Smith           }
2220dd9472c6SBarry Smith         }
22217dc0baabSHong Zhang       }
2222f1241b54SBarry Smith     noinsert2:;
222392c4ed94SBarry Smith       low = i;
222492c4ed94SBarry Smith     }
222592c4ed94SBarry Smith     ailen[row] = nrow;
222692c4ed94SBarry Smith   }
22273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
222892c4ed94SBarry Smith }
222926e093fcSHong Zhang 
2230d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2231d71ae5a4SJacob Faibussowitsch {
2232584200bdSSatish Balay   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2233580bdb30SBarry Smith   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2234d0f46423SBarry Smith   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2235c1ac3661SBarry Smith   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
22363f1db9ecSBarry Smith   MatScalar   *aa    = a->a, *ap;
22373447b6efSHong Zhang   PetscReal    ratio = 0.6;
2238584200bdSSatish Balay 
22393a40ed3dSBarry Smith   PetscFunctionBegin;
22403ba16761SJacob Faibussowitsch   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
2241584200bdSSatish Balay 
224243ee02c3SBarry Smith   if (m) rmax = ailen[0];
2243584200bdSSatish Balay   for (i = 1; i < mbs; i++) {
2244584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
2245584200bdSSatish Balay     fshift += imax[i - 1] - ailen[i - 1];
2246d402145bSBarry Smith     rmax = PetscMax(rmax, ailen[i]);
2247584200bdSSatish Balay     if (fshift) {
2248580bdb30SBarry Smith       ip = aj + ai[i];
2249580bdb30SBarry Smith       ap = aa + bs2 * ai[i];
2250584200bdSSatish Balay       N  = ailen[i];
22519566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(ip - fshift, ip, N));
225248a46eb9SPierre Jolivet       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2253672ba085SHong Zhang     }
2254584200bdSSatish Balay     ai[i] = ai[i - 1] + ailen[i - 1];
2255584200bdSSatish Balay   }
2256584200bdSSatish Balay   if (mbs) {
2257584200bdSSatish Balay     fshift += imax[mbs - 1] - ailen[mbs - 1];
2258584200bdSSatish Balay     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2259584200bdSSatish Balay   }
22607c565772SBarry Smith 
2261584200bdSSatish Balay   /* reset ilen and imax for each row */
22627c565772SBarry Smith   a->nonzerorowcnt = 0;
2263672ba085SHong Zhang   if (A->structure_only) {
22649566063dSJacob Faibussowitsch     PetscCall(PetscFree2(a->imax, a->ilen));
2265672ba085SHong Zhang   } else { /* !A->structure_only */
2266584200bdSSatish Balay     for (i = 0; i < mbs; i++) {
2267584200bdSSatish Balay       ailen[i] = imax[i] = ai[i + 1] - ai[i];
22687c565772SBarry Smith       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2269584200bdSSatish Balay     }
2270672ba085SHong Zhang   }
2271a7c10996SSatish Balay   a->nz = ai[mbs];
2272584200bdSSatish Balay 
2273584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
2274b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
2275*ff6a9541SJacob Faibussowitsch   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
22765f80ce2aSJacob Faibussowitsch   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
22779566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
22789566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
22799566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
228026fbe8dcSKarl Rupp 
22818e58a170SBarry Smith   A->info.mallocs += a->reallocs;
2282e2f3b5e9SSatish Balay   a->reallocs         = 0;
22830e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2284647a6520SHong Zhang   a->rmax             = rmax;
2285cf4441caSHong Zhang 
228648a46eb9SPierre Jolivet   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
22873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2288584200bdSSatish Balay }
2289584200bdSSatish Balay 
2290bea157c4SSatish Balay /*
2291bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2292bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2293a5b23f4aSJose E. Roman    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2294bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2295bea157c4SSatish Balay */
2296d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2297d71ae5a4SJacob Faibussowitsch {
2298*ff6a9541SJacob Faibussowitsch   PetscInt j = 0;
22993a40ed3dSBarry Smith 
2300433994e6SBarry Smith   PetscFunctionBegin;
2301*ff6a9541SJacob Faibussowitsch   for (PetscInt i = 0; i < n; j++) {
2302*ff6a9541SJacob Faibussowitsch     PetscInt row = idx[i];
2303a5b23f4aSJose E. Roman     if (row % bs != 0) { /* Not the beginning of a block */
2304bea157c4SSatish Balay       sizes[j] = 1;
2305bea157c4SSatish Balay       i++;
2306e4fda26cSSatish Balay     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2307bea157c4SSatish Balay       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2308bea157c4SSatish Balay       i++;
23096aad120cSJose E. Roman     } else { /* Beginning of the block, so check if the complete block exists */
2310*ff6a9541SJacob Faibussowitsch       PetscBool flg = PETSC_TRUE;
2311*ff6a9541SJacob Faibussowitsch       for (PetscInt k = 1; k < bs; k++) {
2312bea157c4SSatish Balay         if (row + k != idx[i + k]) { /* break in the block */
2313bea157c4SSatish Balay           flg = PETSC_FALSE;
2314bea157c4SSatish Balay           break;
2315d9b7c43dSSatish Balay         }
2316bea157c4SSatish Balay       }
2317abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2318bea157c4SSatish Balay         sizes[j] = bs;
2319bea157c4SSatish Balay         i += bs;
2320bea157c4SSatish Balay       } else {
2321bea157c4SSatish Balay         sizes[j] = 1;
2322bea157c4SSatish Balay         i++;
2323bea157c4SSatish Balay       }
2324bea157c4SSatish Balay     }
2325bea157c4SSatish Balay   }
2326bea157c4SSatish Balay   *bs_max = j;
23273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2328d9b7c43dSSatish Balay }
2329d9b7c43dSSatish Balay 
2330d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2331d71ae5a4SJacob Faibussowitsch {
2332d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2333f4df32b1SMatthew Knepley   PetscInt           i, j, k, count, *rows;
2334d0f46423SBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
233587828ca2SBarry Smith   PetscScalar        zero = 0.0;
23363f1db9ecSBarry Smith   MatScalar         *aa;
233797b48c8fSBarry Smith   const PetscScalar *xx;
233897b48c8fSBarry Smith   PetscScalar       *bb;
2339d9b7c43dSSatish Balay 
23403a40ed3dSBarry Smith   PetscFunctionBegin;
234197b48c8fSBarry Smith   /* fix right hand side if needed */
234297b48c8fSBarry Smith   if (x && b) {
23439566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23449566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
2345ad540459SPierre Jolivet     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
23469566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
23479566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
234897b48c8fSBarry Smith   }
234997b48c8fSBarry Smith 
2350d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2351bea157c4SSatish Balay   /* allocate memory for rows,sizes */
23529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2353bea157c4SSatish Balay 
2354563b5814SBarry Smith   /* copy IS values to rows, and sort them */
235526fbe8dcSKarl Rupp   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
23569566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(is_n, rows));
235797b48c8fSBarry Smith 
2358a9817697SBarry Smith   if (baij->keepnonzeropattern) {
235926fbe8dcSKarl Rupp     for (i = 0; i < is_n; i++) sizes[i] = 1;
2360dffd3267SBarry Smith     bs_max = is_n;
2361dffd3267SBarry Smith   } else {
23629566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2363e56f5c9eSBarry Smith     A->nonzerostate++;
2364dffd3267SBarry Smith   }
2365bea157c4SSatish Balay 
2366bea157c4SSatish Balay   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2367bea157c4SSatish Balay     row = rows[j];
23685f80ce2aSJacob Faibussowitsch     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2369bea157c4SSatish Balay     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2370b31fbe3bSSatish Balay     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2371a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2372d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2373bea157c4SSatish Balay         if (baij->ilen[row / bs] > 0) {
2374bea157c4SSatish Balay           baij->ilen[row / bs]       = 1;
2375bea157c4SSatish Balay           baij->j[baij->i[row / bs]] = row / bs;
237626fbe8dcSKarl Rupp 
23779566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(aa, count * bs));
2378a07cd24cSSatish Balay         }
2379563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
238048a46eb9SPierre Jolivet         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2381f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2382bea157c4SSatish Balay         baij->ilen[row / bs] = 0;
2383f4df32b1SMatthew Knepley       }      /* end (diag == 0.0) */
2384bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
23856bdcaf15SBarry Smith       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2386bea157c4SSatish Balay       for (k = 0; k < count; k++) {
2387d9b7c43dSSatish Balay         aa[0] = zero;
2388d9b7c43dSSatish Balay         aa += bs;
2389d9b7c43dSSatish Balay       }
239048a46eb9SPierre Jolivet       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2391d9b7c43dSSatish Balay     }
2392bea157c4SSatish Balay   }
2393bea157c4SSatish Balay 
23949566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, sizes));
23959566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
23963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2397d9b7c43dSSatish Balay }
23981c351548SSatish Balay 
2399*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2400d71ae5a4SJacob Faibussowitsch {
240197b48c8fSBarry Smith   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
240297b48c8fSBarry Smith   PetscInt           i, j, k, count;
240397b48c8fSBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
240497b48c8fSBarry Smith   PetscScalar        zero = 0.0;
240597b48c8fSBarry Smith   MatScalar         *aa;
240697b48c8fSBarry Smith   const PetscScalar *xx;
240797b48c8fSBarry Smith   PetscScalar       *bb;
240856777dd2SBarry Smith   PetscBool         *zeroed, vecs = PETSC_FALSE;
240997b48c8fSBarry Smith 
241097b48c8fSBarry Smith   PetscFunctionBegin;
241197b48c8fSBarry Smith   /* fix right hand side if needed */
241297b48c8fSBarry Smith   if (x && b) {
24139566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
24149566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
241556777dd2SBarry Smith     vecs = PETSC_TRUE;
241697b48c8fSBarry Smith   }
241797b48c8fSBarry Smith 
241897b48c8fSBarry Smith   /* zero the columns */
24199566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
242097b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
24215f80ce2aSJacob Faibussowitsch     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
242297b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
242397b48c8fSBarry Smith   }
242497b48c8fSBarry Smith   for (i = 0; i < A->rmap->N; i++) {
242597b48c8fSBarry Smith     if (!zeroed[i]) {
242697b48c8fSBarry Smith       row = i / bs;
242797b48c8fSBarry Smith       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
242897b48c8fSBarry Smith         for (k = 0; k < bs; k++) {
242997b48c8fSBarry Smith           col = bs * baij->j[j] + k;
243097b48c8fSBarry Smith           if (zeroed[col]) {
243197b48c8fSBarry Smith             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
243256777dd2SBarry Smith             if (vecs) bb[i] -= aa[0] * xx[col];
243397b48c8fSBarry Smith             aa[0] = 0.0;
243497b48c8fSBarry Smith           }
243597b48c8fSBarry Smith         }
243697b48c8fSBarry Smith       }
243756777dd2SBarry Smith     } else if (vecs) bb[i] = diag * xx[i];
243897b48c8fSBarry Smith   }
24399566063dSJacob Faibussowitsch   PetscCall(PetscFree(zeroed));
244056777dd2SBarry Smith   if (vecs) {
24419566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
24429566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
244356777dd2SBarry Smith   }
244497b48c8fSBarry Smith 
244597b48c8fSBarry Smith   /* zero the rows */
244697b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
244797b48c8fSBarry Smith     row   = is_idx[i];
244897b48c8fSBarry Smith     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
244997b48c8fSBarry Smith     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
245097b48c8fSBarry Smith     for (k = 0; k < count; k++) {
245197b48c8fSBarry Smith       aa[0] = zero;
245297b48c8fSBarry Smith       aa += bs;
245397b48c8fSBarry Smith     }
2454dbbe0bcdSBarry Smith     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
245597b48c8fSBarry Smith   }
24569566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
24573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
245897b48c8fSBarry Smith }
245997b48c8fSBarry Smith 
2460d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2461d71ae5a4SJacob Faibussowitsch {
24622d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2463e2ee6c50SBarry Smith   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2464c1ac3661SBarry Smith   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2465d0f46423SBarry Smith   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2466c1ac3661SBarry Smith   PetscInt     ridx, cidx, bs2                 = a->bs2;
2467ace3abfcSBarry Smith   PetscBool    roworiented = a->roworiented;
2468d8cdefa3SHong Zhang   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
24692d61bbb3SSatish Balay 
24702d61bbb3SSatish Balay   PetscFunctionBegin;
24712d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over added rows */
2472085a36d4SBarry Smith     row  = im[k];
2473085a36d4SBarry Smith     brow = row / bs;
24745ef9f2a5SBarry Smith     if (row < 0) continue;
24756bdcaf15SBarry Smith     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
24762d61bbb3SSatish Balay     rp = aj + ai[brow];
2477672ba085SHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[brow];
24782d61bbb3SSatish Balay     rmax = imax[brow];
24792d61bbb3SSatish Balay     nrow = ailen[brow];
24802d61bbb3SSatish Balay     low  = 0;
2481c71e6ed7SBarry Smith     high = nrow;
24822d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over added columns */
24835ef9f2a5SBarry Smith       if (in[l] < 0) continue;
24846bdcaf15SBarry Smith       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
24859371c9d4SSatish Balay       col  = in[l];
24869371c9d4SSatish Balay       bcol = col / bs;
24879371c9d4SSatish Balay       ridx = row % bs;
24889371c9d4SSatish Balay       cidx = col % bs;
2489672ba085SHong Zhang       if (!A->structure_only) {
24902d61bbb3SSatish Balay         if (roworiented) {
24915ef9f2a5SBarry Smith           value = v[l + k * n];
24922d61bbb3SSatish Balay         } else {
24932d61bbb3SSatish Balay           value = v[k + l * m];
24942d61bbb3SSatish Balay         }
2495672ba085SHong Zhang       }
24969371c9d4SSatish Balay       if (col <= lastcol) low = 0;
24979371c9d4SSatish Balay       else high = nrow;
2498e2ee6c50SBarry Smith       lastcol = col;
24992d61bbb3SSatish Balay       while (high - low > 7) {
25002d61bbb3SSatish Balay         t = (low + high) / 2;
25012d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
25022d61bbb3SSatish Balay         else low = t;
25032d61bbb3SSatish Balay       }
25042d61bbb3SSatish Balay       for (i = low; i < high; i++) {
25052d61bbb3SSatish Balay         if (rp[i] > bcol) break;
25062d61bbb3SSatish Balay         if (rp[i] == bcol) {
25072d61bbb3SSatish Balay           bap = ap + bs2 * i + bs * cidx + ridx;
2508672ba085SHong Zhang           if (!A->structure_only) {
25092d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
25102d61bbb3SSatish Balay             else *bap = value;
2511672ba085SHong Zhang           }
25122d61bbb3SSatish Balay           goto noinsert1;
25132d61bbb3SSatish Balay         }
25142d61bbb3SSatish Balay       }
25152d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
25165f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2517672ba085SHong Zhang       if (A->structure_only) {
2518672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2519672ba085SHong Zhang       } else {
2520fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2521672ba085SHong Zhang       }
25229371c9d4SSatish Balay       N = nrow++ - 1;
25239371c9d4SSatish Balay       high++;
25242d61bbb3SSatish Balay       /* shift up all the later entries in this row */
25259566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
25262d61bbb3SSatish Balay       rp[i] = bcol;
2527580bdb30SBarry Smith       if (!A->structure_only) {
25289566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
25299566063dSJacob Faibussowitsch         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2530580bdb30SBarry Smith         ap[bs2 * i + bs * cidx + ridx] = value;
2531580bdb30SBarry Smith       }
2532085a36d4SBarry Smith       a->nz++;
2533e56f5c9eSBarry Smith       A->nonzerostate++;
25342d61bbb3SSatish Balay     noinsert1:;
25352d61bbb3SSatish Balay       low = i;
25362d61bbb3SSatish Balay     }
25372d61bbb3SSatish Balay     ailen[brow] = nrow;
25382d61bbb3SSatish Balay   }
25393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25402d61bbb3SSatish Balay }
25412d61bbb3SSatish Balay 
2542*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2543d71ae5a4SJacob Faibussowitsch {
25442d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
25452d61bbb3SSatish Balay   Mat          outA;
2546ace3abfcSBarry Smith   PetscBool    row_identity, col_identity;
25472d61bbb3SSatish Balay 
25482d61bbb3SSatish Balay   PetscFunctionBegin;
25495f80ce2aSJacob Faibussowitsch   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
25509566063dSJacob Faibussowitsch   PetscCall(ISIdentity(row, &row_identity));
25519566063dSJacob Faibussowitsch   PetscCall(ISIdentity(col, &col_identity));
25525f80ce2aSJacob Faibussowitsch   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
25532d61bbb3SSatish Balay 
25542d61bbb3SSatish Balay   outA            = inA;
2555d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
25569566063dSJacob Faibussowitsch   PetscCall(PetscFree(inA->solvertype));
25579566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
25582d61bbb3SSatish Balay 
25599566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2560cf242676SKris Buschelman 
25619566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)row));
25629566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
2563c3122656SLisandro Dalcin   a->row = row;
25649566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)col));
25659566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
2566c3122656SLisandro Dalcin   a->col = col;
2567c38d4ed2SBarry Smith 
2568c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
25699566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
25709566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2571c38d4ed2SBarry Smith 
25729566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
25734dfa11a4SJacob Faibussowitsch   if (!a->solve_work) { PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); }
25749566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(outA, inA, info));
25753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25762d61bbb3SSatish Balay }
2577d9b7c43dSSatish Balay 
2578*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2579d71ae5a4SJacob Faibussowitsch {
258027a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
258127a8da17SBarry Smith 
258227a8da17SBarry Smith   PetscFunctionBegin;
2583*ff6a9541SJacob Faibussowitsch   baij->nz = baij->maxnz;
2584*ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2585*ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
25863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
258727a8da17SBarry Smith }
258827a8da17SBarry Smith 
258927a8da17SBarry Smith /*@
259011a5261eSBarry Smith     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix.
259127a8da17SBarry Smith 
259227a8da17SBarry Smith   Input Parameters:
259311a5261eSBarry Smith +  mat - the `MATSEQBAIJ` matrix
259427a8da17SBarry Smith -  indices - the column indices
259527a8da17SBarry Smith 
259615091d37SBarry Smith   Level: advanced
259715091d37SBarry Smith 
259827a8da17SBarry Smith   Notes:
259927a8da17SBarry Smith     This can be called if you have precomputed the nonzero structure of the
260027a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
260111a5261eSBarry Smith   of the `MatSetValues()` operation.
260227a8da17SBarry Smith 
260327a8da17SBarry Smith     You MUST have set the correct numbers of nonzeros per row in the call to
260411a5261eSBarry Smith   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
260527a8da17SBarry Smith 
260611a5261eSBarry Smith     MUST be called before any calls to `MatSetValues()`
260727a8da17SBarry Smith 
260811a5261eSBarry Smith .seealso: `MATSEQBAIJ`, `MatSetValues()`
260927a8da17SBarry Smith @*/
2610d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2611d71ae5a4SJacob Faibussowitsch {
261227a8da17SBarry Smith   PetscFunctionBegin;
26130700a824SBarry Smith   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2614dadcf809SJacob Faibussowitsch   PetscValidIntPointer(indices, 2);
2615cac4c232SBarry Smith   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
26163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
261727a8da17SBarry Smith }
261827a8da17SBarry Smith 
2619d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2620d71ae5a4SJacob Faibussowitsch {
2621273d9f13SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2622c1ac3661SBarry Smith   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2623273d9f13SBarry Smith   PetscReal    atmp;
262487828ca2SBarry Smith   PetscScalar *x, zero = 0.0;
2625273d9f13SBarry Smith   MatScalar   *aa;
2626c1ac3661SBarry Smith   PetscInt     ncols, brow, krow, kcol;
2627273d9f13SBarry Smith 
2628273d9f13SBarry Smith   PetscFunctionBegin;
26295f80ce2aSJacob Faibussowitsch   /* why is this not a macro???????????????????????????????????????????????????????????????? */
26305f80ce2aSJacob Faibussowitsch   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2631d0f46423SBarry Smith   bs  = A->rmap->bs;
2632273d9f13SBarry Smith   aa  = a->a;
2633273d9f13SBarry Smith   ai  = a->i;
2634273d9f13SBarry Smith   aj  = a->j;
2635273d9f13SBarry Smith   mbs = a->mbs;
2636273d9f13SBarry Smith 
26379566063dSJacob Faibussowitsch   PetscCall(VecSet(v, zero));
26389566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
26399566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(v, &n));
26405f80ce2aSJacob Faibussowitsch   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2641273d9f13SBarry Smith   for (i = 0; i < mbs; i++) {
26429371c9d4SSatish Balay     ncols = ai[1] - ai[0];
26439371c9d4SSatish Balay     ai++;
2644273d9f13SBarry Smith     brow = bs * i;
2645273d9f13SBarry Smith     for (j = 0; j < ncols; j++) {
2646273d9f13SBarry Smith       for (kcol = 0; kcol < bs; kcol++) {
2647273d9f13SBarry Smith         for (krow = 0; krow < bs; krow++) {
26489371c9d4SSatish Balay           atmp = PetscAbsScalar(*aa);
26499371c9d4SSatish Balay           aa++;
2650273d9f13SBarry Smith           row = brow + krow; /* row index */
26519371c9d4SSatish Balay           if (PetscAbsScalar(x[row]) < atmp) {
26529371c9d4SSatish Balay             x[row] = atmp;
26539371c9d4SSatish Balay             if (idx) idx[row] = bs * (*aj) + kcol;
26549371c9d4SSatish Balay           }
2655273d9f13SBarry Smith         }
2656273d9f13SBarry Smith       }
2657273d9f13SBarry Smith       aj++;
2658273d9f13SBarry Smith     }
2659273d9f13SBarry Smith   }
26609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
26613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2662273d9f13SBarry Smith }
2663273d9f13SBarry Smith 
2664d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2665d71ae5a4SJacob Faibussowitsch {
26663c896bc6SHong Zhang   PetscFunctionBegin;
26673c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
26683c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
26693c896bc6SHong Zhang     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
26703c896bc6SHong Zhang     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2671d88c0aacSHong Zhang     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
26723c896bc6SHong Zhang 
26735f80ce2aSJacob Faibussowitsch     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
26745f80ce2aSJacob Faibussowitsch     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
26759566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
26769566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)B));
26773c896bc6SHong Zhang   } else {
26789566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
26793c896bc6SHong Zhang   }
26803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
26813c896bc6SHong Zhang }
26823c896bc6SHong Zhang 
2683d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2684d71ae5a4SJacob Faibussowitsch {
2685273d9f13SBarry Smith   PetscFunctionBegin;
26869566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL));
26873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2688273d9f13SBarry Smith }
2689273d9f13SBarry Smith 
2690d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2691d71ae5a4SJacob Faibussowitsch {
2692f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
26936e111a19SKarl Rupp 
2694f2a5309cSSatish Balay   PetscFunctionBegin;
2695f2a5309cSSatish Balay   *array = a->a;
26963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2697f2a5309cSSatish Balay }
2698f2a5309cSSatish Balay 
2699d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2700d71ae5a4SJacob Faibussowitsch {
2701f2a5309cSSatish Balay   PetscFunctionBegin;
2702cda14afcSprj-   *array = NULL;
27033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2704f2a5309cSSatish Balay }
2705f2a5309cSSatish Balay 
2706d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2707d71ae5a4SJacob Faibussowitsch {
2708b264fe52SHong Zhang   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
270952768537SHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
271052768537SHong Zhang   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
271152768537SHong Zhang 
271252768537SHong Zhang   PetscFunctionBegin;
271352768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
27149566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
27153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
271652768537SHong Zhang }
271752768537SHong Zhang 
2718d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2719d71ae5a4SJacob Faibussowitsch {
272042ee4b1aSHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
272131ce2d13SHong Zhang   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2722e838b9e7SJed Brown   PetscBLASInt one = 1;
272342ee4b1aSHong Zhang 
272442ee4b1aSHong Zhang   PetscFunctionBegin;
2725134adf20SPierre Jolivet   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2726134adf20SPierre Jolivet     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2727134adf20SPierre Jolivet     if (e) {
27289566063dSJacob Faibussowitsch       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2729134adf20SPierre Jolivet       if (e) {
27309566063dSJacob Faibussowitsch         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2731134adf20SPierre Jolivet         if (e) str = SAME_NONZERO_PATTERN;
2732134adf20SPierre Jolivet       }
2733134adf20SPierre Jolivet     }
273454c59aa7SJacob Faibussowitsch     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2735134adf20SPierre Jolivet   }
273642ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2737f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2738c5df96a5SBarry Smith     PetscBLASInt bnz;
27399566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2740792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
27419566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2742ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
27439566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
274442ee4b1aSHong Zhang   } else {
274552768537SHong Zhang     Mat       B;
274652768537SHong Zhang     PetscInt *nnz;
274754c59aa7SJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
27489566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
27499566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
27509566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
27519566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
27529566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
27539566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
27549566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
27559566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
27569566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
27579566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
27589566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
275942ee4b1aSHong Zhang   }
27603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
276142ee4b1aSHong Zhang }
276242ee4b1aSHong Zhang 
2763d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2764d71ae5a4SJacob Faibussowitsch {
2765*ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
27662726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27672726fb6dSPierre Jolivet   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
27682726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
27692726fb6dSPierre Jolivet 
27702726fb6dSPierre Jolivet   PetscFunctionBegin;
27712726fb6dSPierre Jolivet   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
27723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2773*ff6a9541SJacob Faibussowitsch #else
2774*ff6a9541SJacob Faibussowitsch   (void)A;
2775*ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2776*ff6a9541SJacob Faibussowitsch #endif
27772726fb6dSPierre Jolivet }
27782726fb6dSPierre Jolivet 
2779*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2780d71ae5a4SJacob Faibussowitsch {
2781*ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
278299cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
278399cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2784dd6ea824SBarry Smith   MatScalar   *aa = a->a;
278599cafbc1SBarry Smith 
278699cafbc1SBarry Smith   PetscFunctionBegin;
278799cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
27883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2789*ff6a9541SJacob Faibussowitsch #else
2790*ff6a9541SJacob Faibussowitsch   (void)A;
2791*ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2792*ff6a9541SJacob Faibussowitsch #endif
279399cafbc1SBarry Smith }
279499cafbc1SBarry Smith 
2795*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2796d71ae5a4SJacob Faibussowitsch {
2797*ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
279899cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
279999cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2800dd6ea824SBarry Smith   MatScalar   *aa = a->a;
280199cafbc1SBarry Smith 
280299cafbc1SBarry Smith   PetscFunctionBegin;
280399cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
28043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2805*ff6a9541SJacob Faibussowitsch #else
2806*ff6a9541SJacob Faibussowitsch   (void)A;
2807*ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2808*ff6a9541SJacob Faibussowitsch #endif
280999cafbc1SBarry Smith }
281099cafbc1SBarry Smith 
28113acb8795SBarry Smith /*
28122479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
28133acb8795SBarry Smith */
2814*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2815d71ae5a4SJacob Faibussowitsch {
28163acb8795SBarry Smith   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
28173acb8795SBarry Smith   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
28183acb8795SBarry Smith   PetscInt     nz = a->i[m], row, *jj, mr, col;
28193acb8795SBarry Smith 
28203acb8795SBarry Smith   PetscFunctionBegin;
28213acb8795SBarry Smith   *nn = n;
28223ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28235f80ce2aSJacob Faibussowitsch   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
28249566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28259566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28269566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28273acb8795SBarry Smith   jj = a->j;
2828ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
28293acb8795SBarry Smith   cia[0] = oshift;
2830ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28319566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
28323acb8795SBarry Smith   jj = a->j;
28333acb8795SBarry Smith   for (row = 0; row < m; row++) {
28343acb8795SBarry Smith     mr = a->i[row + 1] - a->i[row];
28353acb8795SBarry Smith     for (i = 0; i < mr; i++) {
28363acb8795SBarry Smith       col = *jj++;
283726fbe8dcSKarl Rupp 
28383acb8795SBarry Smith       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
28393acb8795SBarry Smith     }
28403acb8795SBarry Smith   }
28419566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
28429371c9d4SSatish Balay   *ia = cia;
28439371c9d4SSatish Balay   *ja = cja;
28443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28453acb8795SBarry Smith }
28463acb8795SBarry Smith 
2847*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2848d71ae5a4SJacob Faibussowitsch {
28493acb8795SBarry Smith   PetscFunctionBegin;
28503ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28519566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ia));
28529566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ja));
28533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28543acb8795SBarry Smith }
28553acb8795SBarry Smith 
2856525d23c0SHong Zhang /*
2857525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2858525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2859040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2860525d23c0SHong Zhang  */
2861d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2862d71ae5a4SJacob Faibussowitsch {
2863525d23c0SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2864c0349474SHong Zhang   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2865525d23c0SHong Zhang   PetscInt     nz = a->i[m], row, *jj, mr, col;
2866525d23c0SHong Zhang   PetscInt    *cspidx;
2867f6d58c54SBarry Smith 
2868f6d58c54SBarry Smith   PetscFunctionBegin;
2869525d23c0SHong Zhang   *nn = n;
28703ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2871f6d58c54SBarry Smith 
28729566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28739566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28759566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cspidx));
2876525d23c0SHong Zhang   jj = a->j;
2877ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2878525d23c0SHong Zhang   cia[0] = oshift;
2879ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28809566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
2881525d23c0SHong Zhang   jj = a->j;
2882525d23c0SHong Zhang   for (row = 0; row < m; row++) {
2883525d23c0SHong Zhang     mr = a->i[row + 1] - a->i[row];
2884525d23c0SHong Zhang     for (i = 0; i < mr; i++) {
2885525d23c0SHong Zhang       col                                         = *jj++;
2886525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2887525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2888525d23c0SHong Zhang     }
2889525d23c0SHong Zhang   }
28909566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
2891071fcb05SBarry Smith   *ia    = cia;
2892071fcb05SBarry Smith   *ja    = cja;
2893525d23c0SHong Zhang   *spidx = cspidx;
28943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2895f6d58c54SBarry Smith }
2896f6d58c54SBarry Smith 
2897d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2898d71ae5a4SJacob Faibussowitsch {
2899525d23c0SHong Zhang   PetscFunctionBegin;
29009566063dSJacob Faibussowitsch   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
29019566063dSJacob Faibussowitsch   PetscCall(PetscFree(*spidx));
29023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2903f6d58c54SBarry Smith }
290499cafbc1SBarry Smith 
2905d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2906d71ae5a4SJacob Faibussowitsch {
29077d68702bSBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
29087d68702bSBarry Smith 
29097d68702bSBarry Smith   PetscFunctionBegin;
291048a46eb9SPierre Jolivet   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
29119566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
29123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29137d68702bSBarry Smith }
29147d68702bSBarry Smith 
29152593348eSBarry Smith /* -------------------------------------------------------------------*/
2916dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2917cc2dc46cSBarry Smith                                        MatGetRow_SeqBAIJ,
2918cc2dc46cSBarry Smith                                        MatRestoreRow_SeqBAIJ,
2919cc2dc46cSBarry Smith                                        MatMult_SeqBAIJ_N,
292097304618SKris Buschelman                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
29217c922b88SBarry Smith                                        MatMultTranspose_SeqBAIJ,
29227c922b88SBarry Smith                                        MatMultTransposeAdd_SeqBAIJ,
2923f4259b30SLisandro Dalcin                                        NULL,
2924f4259b30SLisandro Dalcin                                        NULL,
2925f4259b30SLisandro Dalcin                                        NULL,
2926f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
2927cc2dc46cSBarry Smith                                        MatLUFactor_SeqBAIJ,
2928f4259b30SLisandro Dalcin                                        NULL,
2929f4259b30SLisandro Dalcin                                        NULL,
2930f2501298SSatish Balay                                        MatTranspose_SeqBAIJ,
293197304618SKris Buschelman                                        /* 15*/ MatGetInfo_SeqBAIJ,
2932cc2dc46cSBarry Smith                                        MatEqual_SeqBAIJ,
2933cc2dc46cSBarry Smith                                        MatGetDiagonal_SeqBAIJ,
2934cc2dc46cSBarry Smith                                        MatDiagonalScale_SeqBAIJ,
2935cc2dc46cSBarry Smith                                        MatNorm_SeqBAIJ,
2936f4259b30SLisandro Dalcin                                        /* 20*/ NULL,
2937cc2dc46cSBarry Smith                                        MatAssemblyEnd_SeqBAIJ,
2938cc2dc46cSBarry Smith                                        MatSetOption_SeqBAIJ,
2939cc2dc46cSBarry Smith                                        MatZeroEntries_SeqBAIJ,
2940d519adbfSMatthew Knepley                                        /* 24*/ MatZeroRows_SeqBAIJ,
2941f4259b30SLisandro Dalcin                                        NULL,
2942f4259b30SLisandro Dalcin                                        NULL,
2943f4259b30SLisandro Dalcin                                        NULL,
2944f4259b30SLisandro Dalcin                                        NULL,
29454994cf47SJed Brown                                        /* 29*/ MatSetUp_SeqBAIJ,
2946f4259b30SLisandro Dalcin                                        NULL,
2947f4259b30SLisandro Dalcin                                        NULL,
2948f4259b30SLisandro Dalcin                                        NULL,
2949f4259b30SLisandro Dalcin                                        NULL,
2950d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_SeqBAIJ,
2951f4259b30SLisandro Dalcin                                        NULL,
2952f4259b30SLisandro Dalcin                                        NULL,
2953cc2dc46cSBarry Smith                                        MatILUFactor_SeqBAIJ,
2954f4259b30SLisandro Dalcin                                        NULL,
2955d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_SeqBAIJ,
29567dae84e0SHong Zhang                                        MatCreateSubMatrices_SeqBAIJ,
2957cc2dc46cSBarry Smith                                        MatIncreaseOverlap_SeqBAIJ,
2958cc2dc46cSBarry Smith                                        MatGetValues_SeqBAIJ,
29593c896bc6SHong Zhang                                        MatCopy_SeqBAIJ,
2960f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
2961cc2dc46cSBarry Smith                                        MatScale_SeqBAIJ,
29627d68702bSBarry Smith                                        MatShift_SeqBAIJ,
2963f4259b30SLisandro Dalcin                                        NULL,
296497b48c8fSBarry Smith                                        MatZeroRowsColumns_SeqBAIJ,
2965f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
29663b2fbd54SBarry Smith                                        MatGetRowIJ_SeqBAIJ,
296792c4ed94SBarry Smith                                        MatRestoreRowIJ_SeqBAIJ,
29683acb8795SBarry Smith                                        MatGetColumnIJ_SeqBAIJ,
29693acb8795SBarry Smith                                        MatRestoreColumnIJ_SeqBAIJ,
297093dfae19SHong Zhang                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
2971f4259b30SLisandro Dalcin                                        NULL,
2972f4259b30SLisandro Dalcin                                        NULL,
2973090001bdSToby Isaac                                        NULL,
2974d3825aa8SBarry Smith                                        MatSetValuesBlocked_SeqBAIJ,
29757dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2976b9b97703SBarry Smith                                        MatDestroy_SeqBAIJ,
2977b9b97703SBarry Smith                                        MatView_SeqBAIJ,
2978f4259b30SLisandro Dalcin                                        NULL,
2979f4259b30SLisandro Dalcin                                        NULL,
2980f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
2981f4259b30SLisandro Dalcin                                        NULL,
2982f4259b30SLisandro Dalcin                                        NULL,
2983f4259b30SLisandro Dalcin                                        NULL,
2984f4259b30SLisandro Dalcin                                        NULL,
2985d519adbfSMatthew Knepley                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2986f4259b30SLisandro Dalcin                                        NULL,
2987c87e5d42SMatthew Knepley                                        MatConvert_Basic,
2988f4259b30SLisandro Dalcin                                        NULL,
2989f4259b30SLisandro Dalcin                                        NULL,
2990f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
2991f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
2992f4259b30SLisandro Dalcin                                        NULL,
2993f4259b30SLisandro Dalcin                                        NULL,
2994f4259b30SLisandro Dalcin                                        NULL,
2995f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
2996f4259b30SLisandro Dalcin                                        NULL,
2997f4259b30SLisandro Dalcin                                        NULL,
2998f4259b30SLisandro Dalcin                                        NULL,
29995bba2384SShri Abhyankar                                        MatLoad_SeqBAIJ,
3000f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
3001f4259b30SLisandro Dalcin                                        NULL,
3002f4259b30SLisandro Dalcin                                        NULL,
3003f4259b30SLisandro Dalcin                                        NULL,
3004f4259b30SLisandro Dalcin                                        NULL,
3005f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
3006f4259b30SLisandro Dalcin                                        NULL,
3007f4259b30SLisandro Dalcin                                        NULL,
3008f4259b30SLisandro Dalcin                                        NULL,
3009f4259b30SLisandro Dalcin                                        NULL,
3010f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
3011f4259b30SLisandro Dalcin                                        NULL,
3012f4259b30SLisandro Dalcin                                        NULL,
3013f4259b30SLisandro Dalcin                                        NULL,
3014f4259b30SLisandro Dalcin                                        NULL,
3015f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
3016f4259b30SLisandro Dalcin                                        NULL,
3017f4259b30SLisandro Dalcin                                        NULL,
30182726fb6dSPierre Jolivet                                        MatConjugate_SeqBAIJ,
3019f4259b30SLisandro Dalcin                                        NULL,
3020f4259b30SLisandro Dalcin                                        /*104*/ NULL,
302199cafbc1SBarry Smith                                        MatRealPart_SeqBAIJ,
30222af78befSBarry Smith                                        MatImaginaryPart_SeqBAIJ,
3023f4259b30SLisandro Dalcin                                        NULL,
3024f4259b30SLisandro Dalcin                                        NULL,
3025f4259b30SLisandro Dalcin                                        /*109*/ NULL,
3026f4259b30SLisandro Dalcin                                        NULL,
3027f4259b30SLisandro Dalcin                                        NULL,
3028f4259b30SLisandro Dalcin                                        NULL,
3029547795f9SHong Zhang                                        MatMissingDiagonal_SeqBAIJ,
3030f4259b30SLisandro Dalcin                                        /*114*/ NULL,
3031f4259b30SLisandro Dalcin                                        NULL,
3032f4259b30SLisandro Dalcin                                        NULL,
3033f4259b30SLisandro Dalcin                                        NULL,
3034f4259b30SLisandro Dalcin                                        NULL,
3035f4259b30SLisandro Dalcin                                        /*119*/ NULL,
3036f4259b30SLisandro Dalcin                                        NULL,
3037547795f9SHong Zhang                                        MatMultHermitianTranspose_SeqBAIJ,
3038d6037b41SHong Zhang                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3039f4259b30SLisandro Dalcin                                        NULL,
3040f4259b30SLisandro Dalcin                                        /*124*/ NULL,
3041857cbf51SRichard Tran Mills                                        MatGetColumnReductions_SeqBAIJ,
30423964eb88SJed Brown                                        MatInvertBlockDiagonal_SeqBAIJ,
3043f4259b30SLisandro Dalcin                                        NULL,
3044f4259b30SLisandro Dalcin                                        NULL,
3045f4259b30SLisandro Dalcin                                        /*129*/ NULL,
3046f4259b30SLisandro Dalcin                                        NULL,
3047f4259b30SLisandro Dalcin                                        NULL,
3048f4259b30SLisandro Dalcin                                        NULL,
3049f4259b30SLisandro Dalcin                                        NULL,
3050f4259b30SLisandro Dalcin                                        /*134*/ NULL,
3051f4259b30SLisandro Dalcin                                        NULL,
3052f4259b30SLisandro Dalcin                                        NULL,
3053f4259b30SLisandro Dalcin                                        NULL,
3054f4259b30SLisandro Dalcin                                        NULL,
305546533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_Default,
3056f4259b30SLisandro Dalcin                                        NULL,
3057f4259b30SLisandro Dalcin                                        NULL,
3058bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_SeqXAIJ,
3059f4259b30SLisandro Dalcin                                        NULL,
306086e85357SHong Zhang                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3061d70f29a3SPierre Jolivet                                        MatDestroySubMatrices_SeqBAIJ,
3062d70f29a3SPierre Jolivet                                        NULL,
306399a7f59eSMark Adams                                        NULL,
306499a7f59eSMark Adams                                        NULL,
30657fb60732SBarry Smith                                        NULL,
30667fb60732SBarry Smith                                        /*150*/ NULL,
3067dec0b466SHong Zhang                                        NULL};
30682593348eSBarry Smith 
3069*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3070d71ae5a4SJacob Faibussowitsch {
30713e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
30728ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
30733e90b805SBarry Smith 
30743e90b805SBarry Smith   PetscFunctionBegin;
30755f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
30763e90b805SBarry Smith 
30773e90b805SBarry Smith   /* allocate space for values if not already there */
3078*ff6a9541SJacob Faibussowitsch   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
30793e90b805SBarry Smith 
30803e90b805SBarry Smith   /* copy values over */
30819566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
30823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
30833e90b805SBarry Smith }
30843e90b805SBarry Smith 
3085*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3086d71ae5a4SJacob Faibussowitsch {
30873e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
30888ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
30893e90b805SBarry Smith 
30903e90b805SBarry Smith   PetscFunctionBegin;
30915f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
30925f80ce2aSJacob Faibussowitsch   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
30933e90b805SBarry Smith 
30943e90b805SBarry Smith   /* copy values over */
30959566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
30963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
30973e90b805SBarry Smith }
30983e90b805SBarry Smith 
3099cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3100cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3101273d9f13SBarry Smith 
3102d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz)
3103d71ae5a4SJacob Faibussowitsch {
3104a23d5eceSKris Buschelman   Mat_SeqBAIJ *b;
3105535b19f3SBarry Smith   PetscInt     i, mbs, nbs, bs2;
31068afaa268SBarry Smith   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3107a23d5eceSKris Buschelman 
3108a23d5eceSKris Buschelman   PetscFunctionBegin;
31092576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3110ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
3111ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
3112ab93d7beSBarry Smith     nz             = 0;
3113ab93d7beSBarry Smith   }
31148c07d4e3SBarry Smith 
31159566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
31169566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
31179566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
31189566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3119899cda47SBarry Smith 
3120899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
3121899cda47SBarry Smith 
3122d0f46423SBarry Smith   mbs = B->rmap->n / bs;
3123d0f46423SBarry Smith   nbs = B->cmap->n / bs;
3124a23d5eceSKris Buschelman   bs2 = bs * bs;
3125a23d5eceSKris Buschelman 
31265f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3127a23d5eceSKris Buschelman 
3128a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
31295f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3130a23d5eceSKris Buschelman   if (nnz) {
3131a23d5eceSKris Buschelman     for (i = 0; i < mbs; i++) {
31325f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
31335f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3134a23d5eceSKris Buschelman     }
3135a23d5eceSKris Buschelman   }
3136a23d5eceSKris Buschelman 
3137a23d5eceSKris Buschelman   b = (Mat_SeqBAIJ *)B->data;
3138d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
31399566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3140d0609cedSBarry Smith   PetscOptionsEnd();
31418c07d4e3SBarry Smith 
3142a23d5eceSKris Buschelman   if (!flg) {
3143a23d5eceSKris Buschelman     switch (bs) {
3144a23d5eceSKris Buschelman     case 1:
3145a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
3146a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3147a23d5eceSKris Buschelman       break;
3148a23d5eceSKris Buschelman     case 2:
3149a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
3150a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3151a23d5eceSKris Buschelman       break;
3152a23d5eceSKris Buschelman     case 3:
3153a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
3154a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3155a23d5eceSKris Buschelman       break;
3156a23d5eceSKris Buschelman     case 4:
3157a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
3158a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3159a23d5eceSKris Buschelman       break;
3160a23d5eceSKris Buschelman     case 5:
3161a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
3162a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3163a23d5eceSKris Buschelman       break;
3164a23d5eceSKris Buschelman     case 6:
3165a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
3166a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3167a23d5eceSKris Buschelman       break;
3168a23d5eceSKris Buschelman     case 7:
3169a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
3170a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3171a23d5eceSKris Buschelman       break;
31729371c9d4SSatish Balay     case 9: {
31736679dcc1SBarry Smith       PetscInt version = 1;
31749566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
31756679dcc1SBarry Smith       switch (version) {
31765f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
31776679dcc1SBarry Smith       case 1:
317896e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
317996e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
31809566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
31816679dcc1SBarry Smith         break;
31826679dcc1SBarry Smith #endif
31836679dcc1SBarry Smith       default:
318496e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
318596e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
31869566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
318796e086a2SDaniel Kokron         break;
31886679dcc1SBarry Smith       }
31896679dcc1SBarry Smith       break;
31906679dcc1SBarry Smith     }
3191ebada01fSBarry Smith     case 11:
3192ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
3193ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3194ebada01fSBarry Smith       break;
31959371c9d4SSatish Balay     case 12: {
31966679dcc1SBarry Smith       PetscInt version = 1;
31979566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
31986679dcc1SBarry Smith       switch (version) {
31996679dcc1SBarry Smith       case 1:
32006679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
32016679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
32029566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32038ab949d8SShri Abhyankar         break;
32046679dcc1SBarry Smith       case 2:
32056679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
32066679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
32079566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32086679dcc1SBarry Smith         break;
32096679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32106679dcc1SBarry Smith       case 3:
32116679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
32126679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
32139566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32146679dcc1SBarry Smith         break;
32156679dcc1SBarry Smith #endif
3216a23d5eceSKris Buschelman       default:
3217a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
3218a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
32199566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32206679dcc1SBarry Smith         break;
32216679dcc1SBarry Smith       }
32226679dcc1SBarry Smith       break;
32236679dcc1SBarry Smith     }
32249371c9d4SSatish Balay     case 15: {
32256679dcc1SBarry Smith       PetscInt version = 1;
32269566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32276679dcc1SBarry Smith       switch (version) {
32286679dcc1SBarry Smith       case 1:
32296679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
32309566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32316679dcc1SBarry Smith         break;
32326679dcc1SBarry Smith       case 2:
32336679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
32349566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32356679dcc1SBarry Smith         break;
32366679dcc1SBarry Smith       case 3:
32376679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
32389566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32396679dcc1SBarry Smith         break;
32406679dcc1SBarry Smith       case 4:
32416679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
32429566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32436679dcc1SBarry Smith         break;
32446679dcc1SBarry Smith       default:
32456679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_N;
32469566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32476679dcc1SBarry Smith         break;
32486679dcc1SBarry Smith       }
32496679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
32506679dcc1SBarry Smith       break;
32516679dcc1SBarry Smith     }
32526679dcc1SBarry Smith     default:
32536679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
32546679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
32559566063dSJacob Faibussowitsch       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3256a23d5eceSKris Buschelman       break;
3257a23d5eceSKris Buschelman     }
3258a23d5eceSKris Buschelman   }
3259e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
3260a23d5eceSKris Buschelman   b->mbs      = mbs;
3261a23d5eceSKris Buschelman   b->nbs      = nbs;
3262ab93d7beSBarry Smith   if (!skipallocation) {
32632ee49352SLisandro Dalcin     if (!b->imax) {
32649566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
326526fbe8dcSKarl Rupp 
32664fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
32672ee49352SLisandro Dalcin     }
3268ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
326926fbe8dcSKarl Rupp     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3270a23d5eceSKris Buschelman     if (!nnz) {
3271a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3272c62bd62aSJed Brown       else if (nz < 0) nz = 1;
32735d2a9ed1SStefano Zampini       nz = PetscMin(nz, nbs);
3274a23d5eceSKris Buschelman       for (i = 0; i < mbs; i++) b->imax[i] = nz;
32759566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, mbs, &nz));
3276a23d5eceSKris Buschelman     } else {
3277c73702f5SBarry Smith       PetscInt64 nz64 = 0;
32789371c9d4SSatish Balay       for (i = 0; i < mbs; i++) {
32799371c9d4SSatish Balay         b->imax[i] = nnz[i];
32809371c9d4SSatish Balay         nz64 += nnz[i];
32819371c9d4SSatish Balay       }
32829566063dSJacob Faibussowitsch       PetscCall(PetscIntCast(nz64, &nz));
3283a23d5eceSKris Buschelman     }
3284a23d5eceSKris Buschelman 
3285a23d5eceSKris Buschelman     /* allocate the matrix space */
32869566063dSJacob Faibussowitsch     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3287672ba085SHong Zhang     if (B->structure_only) {
32889566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz, &b->j));
32899566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
3290672ba085SHong Zhang     } else {
32916679dcc1SBarry Smith       PetscInt nzbs2 = 0;
32929566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
32939566063dSJacob Faibussowitsch       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
32949566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(b->a, nz * bs2));
3295672ba085SHong Zhang     }
32969566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(b->j, nz));
329726fbe8dcSKarl Rupp 
3298672ba085SHong Zhang     if (B->structure_only) {
3299672ba085SHong Zhang       b->singlemalloc = PETSC_FALSE;
3300672ba085SHong Zhang       b->free_a       = PETSC_FALSE;
3301672ba085SHong Zhang     } else {
3302a23d5eceSKris Buschelman       b->singlemalloc = PETSC_TRUE;
3303672ba085SHong Zhang       b->free_a       = PETSC_TRUE;
3304672ba085SHong Zhang     }
3305672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
3306672ba085SHong Zhang 
3307a23d5eceSKris Buschelman     b->i[0] = 0;
3308ad540459SPierre Jolivet     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3309672ba085SHong Zhang 
3310e811da20SHong Zhang   } else {
3311e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3312e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3313ab93d7beSBarry Smith   }
3314a23d5eceSKris Buschelman 
3315a23d5eceSKris Buschelman   b->bs2              = bs2;
3316a23d5eceSKris Buschelman   b->mbs              = mbs;
3317a23d5eceSKris Buschelman   b->nz               = 0;
3318b32cb4a7SJed Brown   b->maxnz            = nz;
3319b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3320cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3321cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
33229566063dSJacob Faibussowitsch   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
33233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3324a23d5eceSKris Buschelman }
3325a23d5eceSKris Buschelman 
3326d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3327d71ae5a4SJacob Faibussowitsch {
3328725b52f3SLisandro Dalcin   PetscInt     i, m, nz, nz_max = 0, *nnz;
3329f4259b30SLisandro Dalcin   PetscScalar *values      = NULL;
3330d47bf9aaSJed Brown   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3331725b52f3SLisandro Dalcin 
3332725b52f3SLisandro Dalcin   PetscFunctionBegin;
33335f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
33349566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
33359566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
33369566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
33379566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
33389566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3339d0f46423SBarry Smith   m = B->rmap->n / bs;
3340725b52f3SLisandro Dalcin 
33415f80ce2aSJacob Faibussowitsch   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
33429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &nnz));
3343725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3344cf12db73SBarry Smith     nz = ii[i + 1] - ii[i];
33455f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3346725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3347725b52f3SLisandro Dalcin     nnz[i] = nz;
3348725b52f3SLisandro Dalcin   }
33499566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
33509566063dSJacob Faibussowitsch   PetscCall(PetscFree(nnz));
3351725b52f3SLisandro Dalcin 
3352725b52f3SLisandro Dalcin   values = (PetscScalar *)V;
335348a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3354725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3355cf12db73SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
3356cf12db73SBarry Smith     const PetscInt *icols = jj + ii[i];
3357bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3358cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
33599566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
33603adadaf3SJed Brown     } else {
33613adadaf3SJed Brown       PetscInt j;
33623adadaf3SJed Brown       for (j = 0; j < ncols; j++) {
33633adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
33649566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
33653adadaf3SJed Brown       }
33663adadaf3SJed Brown     }
3367725b52f3SLisandro Dalcin   }
33689566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
33699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
33709566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
33719566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
33723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3373725b52f3SLisandro Dalcin }
3374725b52f3SLisandro Dalcin 
3375cda14afcSprj- /*@C
337611a5261eSBarry Smith    MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3377cda14afcSprj- 
3378cda14afcSprj-    Not Collective
3379cda14afcSprj- 
3380cda14afcSprj-    Input Parameter:
338111a5261eSBarry Smith .  mat - a `MATSEQBAIJ` matrix
3382cda14afcSprj- 
3383cda14afcSprj-    Output Parameter:
3384cda14afcSprj- .   array - pointer to the data
3385cda14afcSprj- 
3386cda14afcSprj-    Level: intermediate
3387cda14afcSprj- 
338811a5261eSBarry Smith .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3389cda14afcSprj- @*/
3390d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array)
3391d71ae5a4SJacob Faibussowitsch {
3392cda14afcSprj-   PetscFunctionBegin;
3393cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
33943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3395cda14afcSprj- }
3396cda14afcSprj- 
3397cda14afcSprj- /*@C
339811a5261eSBarry Smith    MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3399cda14afcSprj- 
3400cda14afcSprj-    Not Collective
3401cda14afcSprj- 
3402cda14afcSprj-    Input Parameters:
340311a5261eSBarry Smith +  mat - a `MATSEQBAIJ` matrix
3404cda14afcSprj- -  array - pointer to the data
3405cda14afcSprj- 
3406cda14afcSprj-    Level: intermediate
3407cda14afcSprj- 
3408db781477SPatrick Sanan .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3409cda14afcSprj- @*/
3410d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array)
3411d71ae5a4SJacob Faibussowitsch {
3412cda14afcSprj-   PetscFunctionBegin;
3413cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
34143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3415cda14afcSprj- }
3416cda14afcSprj- 
34170bad9183SKris Buschelman /*MC
3418fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
34190bad9183SKris Buschelman    block sparse compressed row format.
34200bad9183SKris Buschelman 
34210bad9183SKris Buschelman    Options Database Keys:
34226679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
34236679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
34240bad9183SKris Buschelman 
34250bad9183SKris Buschelman    Level: beginner
34260cd7f59aSBarry Smith 
34270cd7f59aSBarry Smith    Notes:
342811a5261eSBarry Smith     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
342911a5261eSBarry Smith     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
34300bad9183SKris Buschelman 
34316679dcc1SBarry Smith    Run with -info to see what version of the matrix-vector product is being used
34326679dcc1SBarry Smith 
3433db781477SPatrick Sanan .seealso: `MatCreateSeqBAIJ()`
34340bad9183SKris Buschelman M*/
34350bad9183SKris Buschelman 
3436cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3437b24902e0SBarry Smith 
3438d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3439d71ae5a4SJacob Faibussowitsch {
3440c1ac3661SBarry Smith   PetscMPIInt  size;
3441b6490206SBarry Smith   Mat_SeqBAIJ *b;
34423b2fbd54SBarry Smith 
34433a40ed3dSBarry Smith   PetscFunctionBegin;
34449566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
34455f80ce2aSJacob Faibussowitsch   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3446b6490206SBarry Smith 
34474dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
3448b0a32e0cSBarry Smith   B->data = (void *)b;
34499566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
345026fbe8dcSKarl Rupp 
3451f4259b30SLisandro Dalcin   b->row          = NULL;
3452f4259b30SLisandro Dalcin   b->col          = NULL;
3453f4259b30SLisandro Dalcin   b->icol         = NULL;
34542593348eSBarry Smith   b->reallocs     = 0;
3455f4259b30SLisandro Dalcin   b->saved_values = NULL;
34562593348eSBarry Smith 
3457c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
34582593348eSBarry Smith   b->nonew              = 0;
3459f4259b30SLisandro Dalcin   b->diag               = NULL;
3460f4259b30SLisandro Dalcin   B->spptr              = NULL;
3461b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3462a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
34634e220ebcSLois Curfman McInnes 
34649566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
34659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
34669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
34679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
34689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
34699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
34709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
34719566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
34729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
34739566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
34747ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
34759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
34767ea3e4caSstefano_zampini #endif
34779566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
34789566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
34793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
34802593348eSBarry Smith }
34812593348eSBarry Smith 
3482d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3483d71ae5a4SJacob Faibussowitsch {
3484b24902e0SBarry Smith   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3485a96a251dSBarry Smith   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3486de6a44a3SBarry Smith 
34873a40ed3dSBarry Smith   PetscFunctionBegin;
34885f80ce2aSJacob Faibussowitsch   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
34892593348eSBarry Smith 
34904fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
34914fd072dbSBarry Smith     c->imax           = a->imax;
34924fd072dbSBarry Smith     c->ilen           = a->ilen;
34934fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
34944fd072dbSBarry Smith   } else {
34959566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3496b6490206SBarry Smith     for (i = 0; i < mbs; i++) {
34972593348eSBarry Smith       c->imax[i] = a->imax[i];
34982593348eSBarry Smith       c->ilen[i] = a->ilen[i];
34992593348eSBarry Smith     }
35004fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
35014fd072dbSBarry Smith   }
35022593348eSBarry Smith 
35032593348eSBarry Smith   /* allocate the matrix space */
350416a2bf60SHong Zhang   if (mallocmatspace) {
35054fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35069566063dSJacob Faibussowitsch       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
350726fbe8dcSKarl Rupp 
35084fd072dbSBarry Smith       c->i            = a->i;
35094fd072dbSBarry Smith       c->j            = a->j;
3510379be0ddSLisandro Dalcin       c->singlemalloc = PETSC_FALSE;
3511379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
3512379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
35134fd072dbSBarry Smith       c->parent       = A;
35141e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
35151e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
351626fbe8dcSKarl Rupp 
35179566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)A));
35189566063dSJacob Faibussowitsch       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35199566063dSJacob Faibussowitsch       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35204fd072dbSBarry Smith     } else {
35219566063dSJacob Faibussowitsch       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
352226fbe8dcSKarl Rupp 
3523c4992f7dSBarry Smith       c->singlemalloc = PETSC_TRUE;
3524379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
35254fd072dbSBarry Smith       c->free_ij      = PETSC_TRUE;
352626fbe8dcSKarl Rupp 
35279566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3528b6490206SBarry Smith       if (mbs > 0) {
35299566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(c->j, a->j, nz));
35302e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
35319566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
35322e8a6d31SBarry Smith         } else {
35339566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(c->a, bs2 * nz));
35342593348eSBarry Smith         }
35352593348eSBarry Smith       }
35361e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
35371e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
353816a2bf60SHong Zhang     }
35394fd072dbSBarry Smith   }
354016a2bf60SHong Zhang 
35412593348eSBarry Smith   c->roworiented = a->roworiented;
35422593348eSBarry Smith   c->nonew       = a->nonew;
354326fbe8dcSKarl Rupp 
35449566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
35459566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
354626fbe8dcSKarl Rupp 
35475c9eb25fSBarry Smith   c->bs2 = a->bs2;
35485c9eb25fSBarry Smith   c->mbs = a->mbs;
35495c9eb25fSBarry Smith   c->nbs = a->nbs;
35502593348eSBarry Smith 
35512593348eSBarry Smith   if (a->diag) {
35524fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35534fd072dbSBarry Smith       c->diag      = a->diag;
35544fd072dbSBarry Smith       c->free_diag = PETSC_FALSE;
35554fd072dbSBarry Smith     } else {
35569566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
355726fbe8dcSKarl Rupp       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
35584fd072dbSBarry Smith       c->free_diag = PETSC_TRUE;
35594fd072dbSBarry Smith     }
3560f4259b30SLisandro Dalcin   } else c->diag = NULL;
356126fbe8dcSKarl Rupp 
35622593348eSBarry Smith   c->nz         = a->nz;
3563f2cbd3d5SJed Brown   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3564f361c04dSBarry Smith   c->solve_work = NULL;
3565f361c04dSBarry Smith   c->mult_work  = NULL;
3566f361c04dSBarry Smith   c->sor_workt  = NULL;
3567f361c04dSBarry Smith   c->sor_work   = NULL;
356888e51ccdSHong Zhang 
356988e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
357088e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3571cd6b891eSBarry Smith   if (a->compressedrow.use) {
357288e51ccdSHong Zhang     i = a->compressedrow.nrows;
35739566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
35749566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
35759566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
357688e51ccdSHong Zhang   } else {
357788e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
35780298fd71SBarry Smith     c->compressedrow.i      = NULL;
35790298fd71SBarry Smith     c->compressedrow.rindex = NULL;
358088e51ccdSHong Zhang   }
3581e56f5c9eSBarry Smith   C->nonzerostate = A->nonzerostate;
358226fbe8dcSKarl Rupp 
35839566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
35843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
35852593348eSBarry Smith }
35862593348eSBarry Smith 
3587d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3588d71ae5a4SJacob Faibussowitsch {
3589b24902e0SBarry Smith   PetscFunctionBegin;
35909566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
35919566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
35929566063dSJacob Faibussowitsch   PetscCall(MatSetType(*B, MATSEQBAIJ));
35939566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
35943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3595b24902e0SBarry Smith }
3596b24902e0SBarry Smith 
3597618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3598d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3599d71ae5a4SJacob Faibussowitsch {
3600b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3601b51a4376SLisandro Dalcin   PetscInt    *rowidxs, *colidxs;
3602b51a4376SLisandro Dalcin   PetscScalar *matvals;
3603b51a4376SLisandro Dalcin 
3604b51a4376SLisandro Dalcin   PetscFunctionBegin;
36059566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
3606b51a4376SLisandro Dalcin 
3607b51a4376SLisandro Dalcin   /* read matrix header */
36089566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
36095f80ce2aSJacob Faibussowitsch   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
36109371c9d4SSatish Balay   M  = header[1];
36119371c9d4SSatish Balay   N  = header[2];
36129371c9d4SSatish Balay   nz = header[3];
36135f80ce2aSJacob Faibussowitsch   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
36145f80ce2aSJacob Faibussowitsch   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
36155f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3616b51a4376SLisandro Dalcin 
3617b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
36189566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3619b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3620b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3621b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3622b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3623b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
36249566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
36259566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
3626b51a4376SLisandro Dalcin 
3627b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
36289566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
36295f80ce2aSJacob Faibussowitsch   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
36309566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
36319566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
36329371c9d4SSatish Balay   mbs = m / bs;
36339371c9d4SSatish Balay   nbs = n / bs;
3634b51a4376SLisandro Dalcin 
3635b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
36369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
36379566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
36389371c9d4SSatish Balay   rowidxs[0] = 0;
36399371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3640b51a4376SLisandro Dalcin   sum = rowidxs[m];
36415f80ce2aSJacob Faibussowitsch   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3642b51a4376SLisandro Dalcin 
3643b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
36449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
36459566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
36469566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3647b51a4376SLisandro Dalcin 
3648b51a4376SLisandro Dalcin   {               /* preallocate matrix storage */
3649b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3650b51a4376SLisandro Dalcin     PetscInt *nnz;
3651618cc2edSLisandro Dalcin     PetscBool sbaij;
3652b51a4376SLisandro Dalcin 
36539566063dSJacob Faibussowitsch     PetscCall(PetscBTCreate(nbs, &bt));
36549566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(mbs, &nnz));
36559566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3656b51a4376SLisandro Dalcin     for (i = 0; i < mbs; i++) {
36579566063dSJacob Faibussowitsch       PetscCall(PetscBTMemzero(nbs, bt));
3658618cc2edSLisandro Dalcin       for (k = 0; k < bs; k++) {
3659618cc2edSLisandro Dalcin         PetscInt row = bs * i + k;
3660618cc2edSLisandro Dalcin         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3661618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3662618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3663618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3664618cc2edSLisandro Dalcin         }
3665618cc2edSLisandro Dalcin       }
3666b51a4376SLisandro Dalcin     }
36679566063dSJacob Faibussowitsch     PetscCall(PetscBTDestroy(&bt));
36689566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
36699566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
36709566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
3671b51a4376SLisandro Dalcin   }
3672b51a4376SLisandro Dalcin 
3673b51a4376SLisandro Dalcin   /* store matrix values */
3674b51a4376SLisandro Dalcin   for (i = 0; i < m; i++) {
3675b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
36769566063dSJacob Faibussowitsch     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3677b51a4376SLisandro Dalcin   }
3678b51a4376SLisandro Dalcin 
36799566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
36809566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
36819566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
36829566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
36833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3684b51a4376SLisandro Dalcin }
3685b51a4376SLisandro Dalcin 
3686d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3687d71ae5a4SJacob Faibussowitsch {
36887f489da9SVaclav Hapla   PetscBool isbinary;
3689f501eaabSShri Abhyankar 
3690f501eaabSShri Abhyankar   PetscFunctionBegin;
36919566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
36925f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
36939566063dSJacob Faibussowitsch   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
36943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3695f501eaabSShri Abhyankar }
3696f501eaabSShri Abhyankar 
3697273d9f13SBarry Smith /*@C
369811a5261eSBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3699273d9f13SBarry Smith    compressed row) format.  For good matrix assembly performance the
3700273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3701273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3702273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
37032593348eSBarry Smith 
3704d083f849SBarry Smith    Collective
3705273d9f13SBarry Smith 
3706273d9f13SBarry Smith    Input Parameters:
370711a5261eSBarry Smith +  comm - MPI communicator, set to `PETSC_COMM_SELF`
370811a5261eSBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
370911a5261eSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3710273d9f13SBarry Smith .  m - number of rows
3711273d9f13SBarry Smith .  n - number of columns
371235d8aa7fSBarry Smith .  nz - number of nonzero blocks  per block row (same for all rows)
371335d8aa7fSBarry Smith -  nnz - array containing the number of nonzero blocks in the various block rows
37140298fd71SBarry Smith          (possibly different for each block row) or NULL
3715273d9f13SBarry Smith 
3716273d9f13SBarry Smith    Output Parameter:
3717273d9f13SBarry Smith .  A - the matrix
3718273d9f13SBarry Smith 
371911a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3720f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
372111a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3722175b88e8SBarry Smith 
3723273d9f13SBarry Smith    Options Database Keys:
372411a5261eSBarry Smith +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3725a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
3726273d9f13SBarry Smith 
3727273d9f13SBarry Smith    Level: intermediate
3728273d9f13SBarry Smith 
3729273d9f13SBarry Smith    Notes:
3730d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
3731d1be2dadSMatthew Knepley 
373249a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
373349a6f317SBarry Smith 
373435d8aa7fSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
373535d8aa7fSBarry Smith 
373611a5261eSBarry Smith    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3737273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3738273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3739273d9f13SBarry Smith 
3740273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
374111a5261eSBarry Smith    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3742651615e1SBarry Smith    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3743273d9f13SBarry Smith    matrices.
3744273d9f13SBarry Smith 
3745651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3746273d9f13SBarry Smith @*/
3747d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3748d71ae5a4SJacob Faibussowitsch {
3749273d9f13SBarry Smith   PetscFunctionBegin;
37509566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
37519566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
37529566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJ));
37539566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
37543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3755273d9f13SBarry Smith }
3756273d9f13SBarry Smith 
3757273d9f13SBarry Smith /*@C
3758273d9f13SBarry Smith    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3759273d9f13SBarry Smith    per row in the matrix. For good matrix assembly performance the
3760273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3761273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3762273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
3763273d9f13SBarry Smith 
3764d083f849SBarry Smith    Collective
3765273d9f13SBarry Smith 
3766273d9f13SBarry Smith    Input Parameters:
37671c4f3114SJed Brown +  B - the matrix
376811a5261eSBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
376911a5261eSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3770273d9f13SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
3771273d9f13SBarry Smith -  nnz - array containing the number of block nonzeros in the various block rows
37720298fd71SBarry Smith          (possibly different for each block row) or NULL
3773273d9f13SBarry Smith 
3774273d9f13SBarry Smith    Options Database Keys:
377511a5261eSBarry Smith +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3776a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
3777273d9f13SBarry Smith 
3778273d9f13SBarry Smith    Level: intermediate
3779273d9f13SBarry Smith 
3780273d9f13SBarry Smith    Notes:
378149a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
378249a6f317SBarry Smith 
378311a5261eSBarry Smith    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3784aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3785aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3786aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3787aa95bbe8SBarry Smith 
378811a5261eSBarry Smith    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3789273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3790273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3791273d9f13SBarry Smith 
3792273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
379311a5261eSBarry Smith    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3794651615e1SBarry Smith    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3795273d9f13SBarry Smith 
3796651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3797273d9f13SBarry Smith @*/
3798d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3799d71ae5a4SJacob Faibussowitsch {
3800273d9f13SBarry Smith   PetscFunctionBegin;
38016ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
38026ba663aaSJed Brown   PetscValidType(B, 1);
38036ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3804cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
38053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3806273d9f13SBarry Smith }
3807a1d92eedSBarry Smith 
3808725b52f3SLisandro Dalcin /*@C
380911a5261eSBarry Smith    MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3810725b52f3SLisandro Dalcin 
3811d083f849SBarry Smith    Collective
3812725b52f3SLisandro Dalcin 
3813725b52f3SLisandro Dalcin    Input Parameters:
38141c4f3114SJed Brown +  B - the matrix
3815725b52f3SLisandro Dalcin .  i - the indices into j for the start of each local row (starts with zero)
3816725b52f3SLisandro Dalcin .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3817725b52f3SLisandro Dalcin -  v - optional values in the matrix
3818725b52f3SLisandro Dalcin 
3819664954b6SBarry Smith    Level: advanced
3820725b52f3SLisandro Dalcin 
38213adadaf3SJed Brown    Notes:
382211a5261eSBarry Smith    The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
382311a5261eSBarry Smith    may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
38243adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
382511a5261eSBarry Smith    `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
38263adadaf3SJed Brown    block column and the second index is over columns within a block.
38273adadaf3SJed Brown 
3828664954b6SBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3829664954b6SBarry Smith 
3830db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3831725b52f3SLisandro Dalcin @*/
3832d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3833d71ae5a4SJacob Faibussowitsch {
3834725b52f3SLisandro Dalcin   PetscFunctionBegin;
38356ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
38366ba663aaSJed Brown   PetscValidType(B, 1);
38376ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3838cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
38393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3840725b52f3SLisandro Dalcin }
3841725b52f3SLisandro Dalcin 
3842c75a6043SHong Zhang /*@
384311a5261eSBarry Smith      MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3844c75a6043SHong Zhang 
3845d083f849SBarry Smith      Collective
3846c75a6043SHong Zhang 
3847c75a6043SHong Zhang    Input Parameters:
3848c75a6043SHong Zhang +  comm - must be an MPI communicator of size 1
3849c75a6043SHong Zhang .  bs - size of block
3850c75a6043SHong Zhang .  m - number of rows
3851c75a6043SHong Zhang .  n - number of columns
3852483a2f95SBarry Smith .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3853c75a6043SHong Zhang .  j - column indices
3854c75a6043SHong Zhang -  a - matrix values
3855c75a6043SHong Zhang 
3856c75a6043SHong Zhang    Output Parameter:
3857c75a6043SHong Zhang .  mat - the matrix
3858c75a6043SHong Zhang 
3859dfb205c3SBarry Smith    Level: advanced
3860c75a6043SHong Zhang 
3861c75a6043SHong Zhang    Notes:
3862c75a6043SHong Zhang        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3863c75a6043SHong Zhang     once the matrix is destroyed
3864c75a6043SHong Zhang 
3865c75a6043SHong Zhang        You cannot set new nonzero locations into this matrix, that will generate an error.
3866c75a6043SHong Zhang 
3867c75a6043SHong Zhang        The i and j indices are 0 based
3868c75a6043SHong Zhang 
386911a5261eSBarry Smith        When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3870dfb205c3SBarry Smith 
38713adadaf3SJed Brown       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
38723adadaf3SJed Brown       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
38733adadaf3SJed Brown       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
38743adadaf3SJed Brown       with column-major ordering within blocks.
3875dfb205c3SBarry Smith 
3876db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3877c75a6043SHong Zhang @*/
3878d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3879d71ae5a4SJacob Faibussowitsch {
3880c75a6043SHong Zhang   Mat_SeqBAIJ *baij;
3881c75a6043SHong Zhang 
3882c75a6043SHong Zhang   PetscFunctionBegin;
38835f80ce2aSJacob Faibussowitsch   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
38845f80ce2aSJacob Faibussowitsch   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3885c75a6043SHong Zhang 
38869566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
38879566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, m, n));
38889566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATSEQBAIJ));
38899566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3890c75a6043SHong Zhang   baij = (Mat_SeqBAIJ *)(*mat)->data;
38919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3892c75a6043SHong Zhang 
3893c75a6043SHong Zhang   baij->i = i;
3894c75a6043SHong Zhang   baij->j = j;
3895c75a6043SHong Zhang   baij->a = a;
389626fbe8dcSKarl Rupp 
3897c75a6043SHong Zhang   baij->singlemalloc   = PETSC_FALSE;
3898c75a6043SHong Zhang   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3899e6b907acSBarry Smith   baij->free_a         = PETSC_FALSE;
3900e6b907acSBarry Smith   baij->free_ij        = PETSC_FALSE;
3901ceb5bf51SJacob Faibussowitsch   baij->free_imax_ilen = PETSC_TRUE;
3902c75a6043SHong Zhang 
3903ceb5bf51SJacob Faibussowitsch   for (PetscInt ii = 0; ii < m; ii++) {
3904ceb5bf51SJacob Faibussowitsch     const PetscInt row_len = i[ii + 1] - i[ii];
3905ceb5bf51SJacob Faibussowitsch 
3906ceb5bf51SJacob Faibussowitsch     baij->ilen[ii] = baij->imax[ii] = row_len;
3907ceb5bf51SJacob Faibussowitsch     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3908c75a6043SHong Zhang   }
390976bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
3910ceb5bf51SJacob Faibussowitsch     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
39116bdcaf15SBarry Smith       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
39126bdcaf15SBarry Smith       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3913c75a6043SHong Zhang     }
391476bd3646SJed Brown   }
3915c75a6043SHong Zhang 
39169566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
39179566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
39183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3919c75a6043SHong Zhang }
3920bdf6f3fcSHong Zhang 
3921d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3922d71ae5a4SJacob Faibussowitsch {
3923bdf6f3fcSHong Zhang   PetscFunctionBegin;
39249566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
39253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3926bdf6f3fcSHong Zhang }
3927