xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 421480d92be24cdb9933c60510b8e175c0a8d034)
12593348eSBarry Smith /*
2b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
32593348eSBarry Smith   matrix storage format.
42593348eSBarry Smith */
5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
6c6db04a5SJed Brown #include <petscblaslapack.h>
7af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
943516a2dSKris Buschelman 
1026cec326SBarry Smith /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
1126cec326SBarry Smith #define TYPE BAIJ
1226cec326SBarry Smith #define TYPE_BS
1326cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1426cec326SBarry Smith #undef TYPE_BS
1526cec326SBarry Smith #define TYPE_BS _BS
1626cec326SBarry Smith #define TYPE_BS_ON
1726cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1826cec326SBarry Smith #undef TYPE_BS
1926cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmat.h"
2026cec326SBarry Smith #undef TYPE
2126cec326SBarry Smith #undef TYPE_BS_ON
2226cec326SBarry Smith 
237ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
247ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
257ea3e4caSstefano_zampini #endif
267ea3e4caSstefano_zampini 
27b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29b5b72c8aSIrina Sokolova #endif
30c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31b5b72c8aSIrina Sokolova 
32*421480d9SBarry Smith MatGetDiagonalMarkers(SeqBAIJ, A->rmap->bs)
33*421480d9SBarry Smith 
34ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
35d71ae5a4SJacob Faibussowitsch {
369463ebdaSPierre Jolivet   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
37ff6a9541SJacob Faibussowitsch   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
389463ebdaSPierre Jolivet   MatScalar   *a_val = a_aij->a;
399463ebdaSPierre Jolivet 
409463ebdaSPierre Jolivet   PetscFunctionBegin;
419566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
42ff6a9541SJacob Faibussowitsch   PetscCall(PetscArrayzero(reductions, n));
439463ebdaSPierre Jolivet   if (type == NORM_2) {
44ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
459463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
469463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
47857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
489463ebdaSPierre Jolivet           a_val++;
499463ebdaSPierre Jolivet         }
509463ebdaSPierre Jolivet       }
519463ebdaSPierre Jolivet     }
529463ebdaSPierre Jolivet   } else if (type == NORM_1) {
53ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
549463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
559463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
56857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
579463ebdaSPierre Jolivet           a_val++;
589463ebdaSPierre Jolivet         }
599463ebdaSPierre Jolivet       }
609463ebdaSPierre Jolivet     }
619463ebdaSPierre Jolivet   } else if (type == NORM_INFINITY) {
62ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
639463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
649463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
656497c311SBarry Smith           PetscInt col    = A->cmap->rstart + a_aij->j[i] * bs + jb;
66857cbf51SRichard Tran Mills           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
679463ebdaSPierre Jolivet           a_val++;
689463ebdaSPierre Jolivet         }
699463ebdaSPierre Jolivet       }
709463ebdaSPierre Jolivet     }
71857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
72ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
73857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
74857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
75857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
76857cbf51SRichard Tran Mills           a_val++;
77857cbf51SRichard Tran Mills         }
78857cbf51SRichard Tran Mills       }
79857cbf51SRichard Tran Mills     }
80857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
82857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
83857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
84857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
85857cbf51SRichard Tran Mills           a_val++;
86857cbf51SRichard Tran Mills         }
87857cbf51SRichard Tran Mills       }
88857cbf51SRichard Tran Mills     }
89857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
909463ebdaSPierre Jolivet   if (type == NORM_2) {
91ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
92857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
93ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
949463ebdaSPierre Jolivet   }
953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
969463ebdaSPierre Jolivet }
979463ebdaSPierre Jolivet 
9866976f2fSJacob Faibussowitsch static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
99d71ae5a4SJacob Faibussowitsch {
100b01c7715SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data;
101*421480d9SBarry Smith   PetscInt        i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
1027f0c90edSBarry Smith   MatScalar      *v     = a->a, *odiag, *diag, work[25], *v_work;
10362bba022SBarry Smith   PetscReal       shift = 0.0;
1041a9391e3SHong Zhang   PetscBool       allowzeropivot, zeropivotdetected = PETSC_FALSE;
105*421480d9SBarry Smith   const PetscInt *adiag;
106b01c7715SBarry Smith 
107b01c7715SBarry Smith   PetscFunctionBegin;
108a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
109a455e926SHong Zhang 
1109797317bSBarry Smith   if (a->idiagvalid) {
1119797317bSBarry Smith     if (values) *values = a->idiag;
1123ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1139797317bSBarry Smith   }
114*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, NULL));
1153a7d0413SPierre Jolivet   if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
116b01c7715SBarry Smith   diag = a->idiag;
117bbead8a2SBarry Smith   if (values) *values = a->idiag;
118b01c7715SBarry Smith   /* factor and invert each block */
119521d7252SBarry Smith   switch (bs) {
120ab040260SJed Brown   case 1:
121ab040260SJed Brown     for (i = 0; i < mbs; i++) {
122*421480d9SBarry Smith       odiag   = v + 1 * adiag[i];
123ab040260SJed Brown       diag[0] = odiag[0];
124ec1892c8SHong Zhang 
125ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
126966bd95aSPierre Jolivet         PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
1277b6c816cSBarry Smith         A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1287b6c816cSBarry Smith         A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1297b6c816cSBarry Smith         A->factorerror_zeropivot_row   = i;
1309566063dSJacob Faibussowitsch         PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
131ec1892c8SHong Zhang       }
132ec1892c8SHong Zhang 
133d4a378daSJed Brown       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134ab040260SJed Brown       diag += 1;
135ab040260SJed Brown     }
136ab040260SJed Brown     break;
137b01c7715SBarry Smith   case 2:
138b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
139*421480d9SBarry Smith       odiag   = v + 4 * adiag[i];
1409371c9d4SSatish Balay       diag[0] = odiag[0];
1419371c9d4SSatish Balay       diag[1] = odiag[1];
1429371c9d4SSatish Balay       diag[2] = odiag[2];
1439371c9d4SSatish Balay       diag[3] = odiag[3];
1449566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1457b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146b01c7715SBarry Smith       diag += 4;
147b01c7715SBarry Smith     }
148b01c7715SBarry Smith     break;
149b01c7715SBarry Smith   case 3:
150b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
151*421480d9SBarry Smith       odiag   = v + 9 * adiag[i];
1529371c9d4SSatish Balay       diag[0] = odiag[0];
1539371c9d4SSatish Balay       diag[1] = odiag[1];
1549371c9d4SSatish Balay       diag[2] = odiag[2];
1559371c9d4SSatish Balay       diag[3] = odiag[3];
1569371c9d4SSatish Balay       diag[4] = odiag[4];
1579371c9d4SSatish Balay       diag[5] = odiag[5];
1589371c9d4SSatish Balay       diag[6] = odiag[6];
1599371c9d4SSatish Balay       diag[7] = odiag[7];
160b01c7715SBarry Smith       diag[8] = odiag[8];
1619566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
1627b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163b01c7715SBarry Smith       diag += 9;
164b01c7715SBarry Smith     }
165b01c7715SBarry Smith     break;
166b01c7715SBarry Smith   case 4:
167b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
168*421480d9SBarry Smith       odiag = v + 16 * adiag[i];
1699566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 16));
1709566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
1717b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172b01c7715SBarry Smith       diag += 16;
173b01c7715SBarry Smith     }
174b01c7715SBarry Smith     break;
175b01c7715SBarry Smith   case 5:
176b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
177*421480d9SBarry Smith       odiag = v + 25 * adiag[i];
1789566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 25));
1799566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
1807b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181b01c7715SBarry Smith       diag += 25;
182b01c7715SBarry Smith     }
183b01c7715SBarry Smith     break;
184d49b2adcSBarry Smith   case 6:
185d49b2adcSBarry Smith     for (i = 0; i < mbs; i++) {
186*421480d9SBarry Smith       odiag = v + 36 * adiag[i];
1879566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 36));
1889566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
1897b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190d49b2adcSBarry Smith       diag += 36;
191d49b2adcSBarry Smith     }
192d49b2adcSBarry Smith     break;
193de80f912SBarry Smith   case 7:
194de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
195*421480d9SBarry Smith       odiag = v + 49 * adiag[i];
1969566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 49));
1979566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
1987b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199de80f912SBarry Smith       diag += 49;
200de80f912SBarry Smith     }
201de80f912SBarry Smith     break;
202b01c7715SBarry Smith   default:
2039566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
205*421480d9SBarry Smith       odiag = v + bs2 * adiag[i];
2069566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, bs2));
2079566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
2087b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209de80f912SBarry Smith       diag += bs2;
210de80f912SBarry Smith     }
2119566063dSJacob Faibussowitsch     PetscCall(PetscFree2(v_work, v_pivots));
212b01c7715SBarry Smith   }
213b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
2143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
215b01c7715SBarry Smith }
216b01c7715SBarry Smith 
21766976f2fSJacob Faibussowitsch static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218d71ae5a4SJacob Faibussowitsch {
2196d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
220e48d15efSToby Isaac   PetscScalar       *x, *work, *w, *workt, *t;
221e48d15efSToby Isaac   const MatScalar   *v, *aa = a->a, *idiag;
222e48d15efSToby Isaac   const PetscScalar *b, *xb;
2235455b99fSToby Isaac   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224e48d15efSToby Isaac   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225c1ac3661SBarry Smith   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
226b01c7715SBarry Smith 
227b01c7715SBarry Smith   PetscFunctionBegin;
228b01c7715SBarry Smith   its = its * lits;
2295f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
2305f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2315f80ce2aSJacob Faibussowitsch   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
2325f80ce2aSJacob Faibussowitsch   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
2335f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234b01c7715SBarry Smith 
2359566063dSJacob Faibussowitsch   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236b01c7715SBarry Smith 
2373ba16761SJacob Faibussowitsch   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238b01c7715SBarry Smith   diag  = a->diag;
239b01c7715SBarry Smith   idiag = a->idiag;
240de80f912SBarry Smith   k     = PetscMax(A->rmap->n, A->cmap->n);
24148a46eb9SPierre Jolivet   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
24248a46eb9SPierre Jolivet   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
24348a46eb9SPierre Jolivet   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
2443475c22fSBarry Smith   work = a->mult_work;
2453475c22fSBarry Smith   t    = a->sor_workt;
246de80f912SBarry Smith   w    = a->sor_work;
247de80f912SBarry Smith 
2489566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
2499566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
250de80f912SBarry Smith 
251de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
252de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253e48d15efSToby Isaac       switch (bs) {
254e48d15efSToby Isaac       case 1:
255e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256e48d15efSToby Isaac         t[0] = b[0];
257e48d15efSToby Isaac         i2   = 1;
258e48d15efSToby Isaac         idiag += 1;
259e48d15efSToby Isaac         for (i = 1; i < m; i++) {
260e48d15efSToby Isaac           v    = aa + ai[i];
261e48d15efSToby Isaac           vi   = aj + ai[i];
262e48d15efSToby Isaac           nz   = diag[i] - ai[i];
263e48d15efSToby Isaac           s[0] = b[i2];
264e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
265e48d15efSToby Isaac             xw[0] = x[vi[j]];
266e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267e48d15efSToby Isaac           }
268e48d15efSToby Isaac           t[i2] = s[0];
269e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270e48d15efSToby Isaac           x[i2] = xw[0];
271e48d15efSToby Isaac           idiag += 1;
272e48d15efSToby Isaac           i2 += 1;
273e48d15efSToby Isaac         }
274e48d15efSToby Isaac         break;
275e48d15efSToby Isaac       case 2:
276e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
2779371c9d4SSatish Balay         t[0] = b[0];
2789371c9d4SSatish Balay         t[1] = b[1];
279e48d15efSToby Isaac         i2   = 2;
280e48d15efSToby Isaac         idiag += 4;
281e48d15efSToby Isaac         for (i = 1; i < m; i++) {
282e48d15efSToby Isaac           v    = aa + 4 * ai[i];
283e48d15efSToby Isaac           vi   = aj + ai[i];
284e48d15efSToby Isaac           nz   = diag[i] - ai[i];
2859371c9d4SSatish Balay           s[0] = b[i2];
2869371c9d4SSatish Balay           s[1] = b[i2 + 1];
287e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
288e48d15efSToby Isaac             idx   = 2 * vi[j];
289e48d15efSToby Isaac             it    = 4 * j;
2909371c9d4SSatish Balay             xw[0] = x[idx];
2919371c9d4SSatish Balay             xw[1] = x[1 + idx];
292e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293e48d15efSToby Isaac           }
2949371c9d4SSatish Balay           t[i2]     = s[0];
2959371c9d4SSatish Balay           t[i2 + 1] = s[1];
296e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
2979371c9d4SSatish Balay           x[i2]     = xw[0];
2989371c9d4SSatish Balay           x[i2 + 1] = xw[1];
299e48d15efSToby Isaac           idiag += 4;
300e48d15efSToby Isaac           i2 += 2;
301e48d15efSToby Isaac         }
302e48d15efSToby Isaac         break;
303e48d15efSToby Isaac       case 3:
304e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
3059371c9d4SSatish Balay         t[0] = b[0];
3069371c9d4SSatish Balay         t[1] = b[1];
3079371c9d4SSatish Balay         t[2] = b[2];
308e48d15efSToby Isaac         i2   = 3;
309e48d15efSToby Isaac         idiag += 9;
310e48d15efSToby Isaac         for (i = 1; i < m; i++) {
311e48d15efSToby Isaac           v    = aa + 9 * ai[i];
312e48d15efSToby Isaac           vi   = aj + ai[i];
313e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3149371c9d4SSatish Balay           s[0] = b[i2];
3159371c9d4SSatish Balay           s[1] = b[i2 + 1];
3169371c9d4SSatish Balay           s[2] = b[i2 + 2];
317e48d15efSToby Isaac           while (nz--) {
318e48d15efSToby Isaac             idx   = 3 * (*vi++);
3199371c9d4SSatish Balay             xw[0] = x[idx];
3209371c9d4SSatish Balay             xw[1] = x[1 + idx];
3219371c9d4SSatish Balay             xw[2] = x[2 + idx];
322e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323e48d15efSToby Isaac             v += 9;
324e48d15efSToby Isaac           }
3259371c9d4SSatish Balay           t[i2]     = s[0];
3269371c9d4SSatish Balay           t[i2 + 1] = s[1];
3279371c9d4SSatish Balay           t[i2 + 2] = s[2];
328e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
3299371c9d4SSatish Balay           x[i2]     = xw[0];
3309371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3319371c9d4SSatish Balay           x[i2 + 2] = xw[2];
332e48d15efSToby Isaac           idiag += 9;
333e48d15efSToby Isaac           i2 += 3;
334e48d15efSToby Isaac         }
335e48d15efSToby Isaac         break;
336e48d15efSToby Isaac       case 4:
337e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
3389371c9d4SSatish Balay         t[0] = b[0];
3399371c9d4SSatish Balay         t[1] = b[1];
3409371c9d4SSatish Balay         t[2] = b[2];
3419371c9d4SSatish Balay         t[3] = b[3];
342e48d15efSToby Isaac         i2   = 4;
343e48d15efSToby Isaac         idiag += 16;
344e48d15efSToby Isaac         for (i = 1; i < m; i++) {
345e48d15efSToby Isaac           v    = aa + 16 * ai[i];
346e48d15efSToby Isaac           vi   = aj + ai[i];
347e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3489371c9d4SSatish Balay           s[0] = b[i2];
3499371c9d4SSatish Balay           s[1] = b[i2 + 1];
3509371c9d4SSatish Balay           s[2] = b[i2 + 2];
3519371c9d4SSatish Balay           s[3] = b[i2 + 3];
352e48d15efSToby Isaac           while (nz--) {
353e48d15efSToby Isaac             idx   = 4 * (*vi++);
3549371c9d4SSatish Balay             xw[0] = x[idx];
3559371c9d4SSatish Balay             xw[1] = x[1 + idx];
3569371c9d4SSatish Balay             xw[2] = x[2 + idx];
3579371c9d4SSatish Balay             xw[3] = x[3 + idx];
358e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359e48d15efSToby Isaac             v += 16;
360e48d15efSToby Isaac           }
3619371c9d4SSatish Balay           t[i2]     = s[0];
3629371c9d4SSatish Balay           t[i2 + 1] = s[1];
3639371c9d4SSatish Balay           t[i2 + 2] = s[2];
3649371c9d4SSatish Balay           t[i2 + 3] = s[3];
365e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
3669371c9d4SSatish Balay           x[i2]     = xw[0];
3679371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3689371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3699371c9d4SSatish Balay           x[i2 + 3] = xw[3];
370e48d15efSToby Isaac           idiag += 16;
371e48d15efSToby Isaac           i2 += 4;
372e48d15efSToby Isaac         }
373e48d15efSToby Isaac         break;
374e48d15efSToby Isaac       case 5:
375e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
3769371c9d4SSatish Balay         t[0] = b[0];
3779371c9d4SSatish Balay         t[1] = b[1];
3789371c9d4SSatish Balay         t[2] = b[2];
3799371c9d4SSatish Balay         t[3] = b[3];
3809371c9d4SSatish Balay         t[4] = b[4];
381e48d15efSToby Isaac         i2   = 5;
382e48d15efSToby Isaac         idiag += 25;
383e48d15efSToby Isaac         for (i = 1; i < m; i++) {
384e48d15efSToby Isaac           v    = aa + 25 * ai[i];
385e48d15efSToby Isaac           vi   = aj + ai[i];
386e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3879371c9d4SSatish Balay           s[0] = b[i2];
3889371c9d4SSatish Balay           s[1] = b[i2 + 1];
3899371c9d4SSatish Balay           s[2] = b[i2 + 2];
3909371c9d4SSatish Balay           s[3] = b[i2 + 3];
3919371c9d4SSatish Balay           s[4] = b[i2 + 4];
392e48d15efSToby Isaac           while (nz--) {
393e48d15efSToby Isaac             idx   = 5 * (*vi++);
3949371c9d4SSatish Balay             xw[0] = x[idx];
3959371c9d4SSatish Balay             xw[1] = x[1 + idx];
3969371c9d4SSatish Balay             xw[2] = x[2 + idx];
3979371c9d4SSatish Balay             xw[3] = x[3 + idx];
3989371c9d4SSatish Balay             xw[4] = x[4 + idx];
399e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400e48d15efSToby Isaac             v += 25;
401e48d15efSToby Isaac           }
4029371c9d4SSatish Balay           t[i2]     = s[0];
4039371c9d4SSatish Balay           t[i2 + 1] = s[1];
4049371c9d4SSatish Balay           t[i2 + 2] = s[2];
4059371c9d4SSatish Balay           t[i2 + 3] = s[3];
4069371c9d4SSatish Balay           t[i2 + 4] = s[4];
407e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
4089371c9d4SSatish Balay           x[i2]     = xw[0];
4099371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4109371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4119371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4129371c9d4SSatish Balay           x[i2 + 4] = xw[4];
413e48d15efSToby Isaac           idiag += 25;
414e48d15efSToby Isaac           i2 += 5;
415e48d15efSToby Isaac         }
416e48d15efSToby Isaac         break;
417e48d15efSToby Isaac       case 6:
418e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
4199371c9d4SSatish Balay         t[0] = b[0];
4209371c9d4SSatish Balay         t[1] = b[1];
4219371c9d4SSatish Balay         t[2] = b[2];
4229371c9d4SSatish Balay         t[3] = b[3];
4239371c9d4SSatish Balay         t[4] = b[4];
4249371c9d4SSatish Balay         t[5] = b[5];
425e48d15efSToby Isaac         i2   = 6;
426e48d15efSToby Isaac         idiag += 36;
427e48d15efSToby Isaac         for (i = 1; i < m; i++) {
428e48d15efSToby Isaac           v    = aa + 36 * ai[i];
429e48d15efSToby Isaac           vi   = aj + ai[i];
430e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4319371c9d4SSatish Balay           s[0] = b[i2];
4329371c9d4SSatish Balay           s[1] = b[i2 + 1];
4339371c9d4SSatish Balay           s[2] = b[i2 + 2];
4349371c9d4SSatish Balay           s[3] = b[i2 + 3];
4359371c9d4SSatish Balay           s[4] = b[i2 + 4];
4369371c9d4SSatish Balay           s[5] = b[i2 + 5];
437e48d15efSToby Isaac           while (nz--) {
438e48d15efSToby Isaac             idx   = 6 * (*vi++);
4399371c9d4SSatish Balay             xw[0] = x[idx];
4409371c9d4SSatish Balay             xw[1] = x[1 + idx];
4419371c9d4SSatish Balay             xw[2] = x[2 + idx];
4429371c9d4SSatish Balay             xw[3] = x[3 + idx];
4439371c9d4SSatish Balay             xw[4] = x[4 + idx];
4449371c9d4SSatish Balay             xw[5] = x[5 + idx];
445e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446e48d15efSToby Isaac             v += 36;
447e48d15efSToby Isaac           }
4489371c9d4SSatish Balay           t[i2]     = s[0];
4499371c9d4SSatish Balay           t[i2 + 1] = s[1];
4509371c9d4SSatish Balay           t[i2 + 2] = s[2];
4519371c9d4SSatish Balay           t[i2 + 3] = s[3];
4529371c9d4SSatish Balay           t[i2 + 4] = s[4];
4539371c9d4SSatish Balay           t[i2 + 5] = s[5];
454e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
4559371c9d4SSatish Balay           x[i2]     = xw[0];
4569371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4579371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4589371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4599371c9d4SSatish Balay           x[i2 + 4] = xw[4];
4609371c9d4SSatish Balay           x[i2 + 5] = xw[5];
461e48d15efSToby Isaac           idiag += 36;
462e48d15efSToby Isaac           i2 += 6;
463e48d15efSToby Isaac         }
464e48d15efSToby Isaac         break;
465e48d15efSToby Isaac       case 7:
466e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
4679371c9d4SSatish Balay         t[0] = b[0];
4689371c9d4SSatish Balay         t[1] = b[1];
4699371c9d4SSatish Balay         t[2] = b[2];
4709371c9d4SSatish Balay         t[3] = b[3];
4719371c9d4SSatish Balay         t[4] = b[4];
4729371c9d4SSatish Balay         t[5] = b[5];
4739371c9d4SSatish Balay         t[6] = b[6];
474e48d15efSToby Isaac         i2   = 7;
475e48d15efSToby Isaac         idiag += 49;
476e48d15efSToby Isaac         for (i = 1; i < m; i++) {
477e48d15efSToby Isaac           v    = aa + 49 * ai[i];
478e48d15efSToby Isaac           vi   = aj + ai[i];
479e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4809371c9d4SSatish Balay           s[0] = b[i2];
4819371c9d4SSatish Balay           s[1] = b[i2 + 1];
4829371c9d4SSatish Balay           s[2] = b[i2 + 2];
4839371c9d4SSatish Balay           s[3] = b[i2 + 3];
4849371c9d4SSatish Balay           s[4] = b[i2 + 4];
4859371c9d4SSatish Balay           s[5] = b[i2 + 5];
4869371c9d4SSatish Balay           s[6] = b[i2 + 6];
487e48d15efSToby Isaac           while (nz--) {
488e48d15efSToby Isaac             idx   = 7 * (*vi++);
4899371c9d4SSatish Balay             xw[0] = x[idx];
4909371c9d4SSatish Balay             xw[1] = x[1 + idx];
4919371c9d4SSatish Balay             xw[2] = x[2 + idx];
4929371c9d4SSatish Balay             xw[3] = x[3 + idx];
4939371c9d4SSatish Balay             xw[4] = x[4 + idx];
4949371c9d4SSatish Balay             xw[5] = x[5 + idx];
4959371c9d4SSatish Balay             xw[6] = x[6 + idx];
496e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497e48d15efSToby Isaac             v += 49;
498e48d15efSToby Isaac           }
4999371c9d4SSatish Balay           t[i2]     = s[0];
5009371c9d4SSatish Balay           t[i2 + 1] = s[1];
5019371c9d4SSatish Balay           t[i2 + 2] = s[2];
5029371c9d4SSatish Balay           t[i2 + 3] = s[3];
5039371c9d4SSatish Balay           t[i2 + 4] = s[4];
5049371c9d4SSatish Balay           t[i2 + 5] = s[5];
5059371c9d4SSatish Balay           t[i2 + 6] = s[6];
506e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
5079371c9d4SSatish Balay           x[i2]     = xw[0];
5089371c9d4SSatish Balay           x[i2 + 1] = xw[1];
5099371c9d4SSatish Balay           x[i2 + 2] = xw[2];
5109371c9d4SSatish Balay           x[i2 + 3] = xw[3];
5119371c9d4SSatish Balay           x[i2 + 4] = xw[4];
5129371c9d4SSatish Balay           x[i2 + 5] = xw[5];
5139371c9d4SSatish Balay           x[i2 + 6] = xw[6];
514e48d15efSToby Isaac           idiag += 49;
515e48d15efSToby Isaac           i2 += 7;
516e48d15efSToby Isaac         }
517e48d15efSToby Isaac         break;
518e48d15efSToby Isaac       default:
51996b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
5209566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(t, b, bs));
521de80f912SBarry Smith         i2 = bs;
522de80f912SBarry Smith         idiag += bs2;
523de80f912SBarry Smith         for (i = 1; i < m; i++) {
524de80f912SBarry Smith           v  = aa + bs2 * ai[i];
525de80f912SBarry Smith           vi = aj + ai[i];
526de80f912SBarry Smith           nz = diag[i] - ai[i];
527de80f912SBarry Smith 
5289566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
529de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
530de80f912SBarry Smith           workt = work;
531de80f912SBarry Smith           for (j = 0; j < nz; j++) {
5329566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533de80f912SBarry Smith             workt += bs;
534de80f912SBarry Smith           }
53596b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
5369566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(t + i2, w, bs));
53796b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538de80f912SBarry Smith 
539de80f912SBarry Smith           idiag += bs2;
540de80f912SBarry Smith           i2 += bs;
541de80f912SBarry Smith         }
542e48d15efSToby Isaac         break;
543e48d15efSToby Isaac       }
544de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
5459566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546e48d15efSToby Isaac       xb = t;
5479371c9d4SSatish Balay     } else xb = b;
548de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
550e48d15efSToby Isaac       i2    = bs * (m - 1);
551e48d15efSToby Isaac       switch (bs) {
552e48d15efSToby Isaac       case 1:
553e48d15efSToby Isaac         s[0] = xb[i2];
554e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555e48d15efSToby Isaac         x[i2] = xw[0];
556e48d15efSToby Isaac         i2 -= 1;
557e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
558e48d15efSToby Isaac           v    = aa + (diag[i] + 1);
559e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
560e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
561e48d15efSToby Isaac           s[0] = xb[i2];
562e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
563e48d15efSToby Isaac             xw[0] = x[vi[j]];
564e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565e48d15efSToby Isaac           }
566e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567e48d15efSToby Isaac           x[i2] = xw[0];
568e48d15efSToby Isaac           idiag -= 1;
569e48d15efSToby Isaac           i2 -= 1;
570e48d15efSToby Isaac         }
571e48d15efSToby Isaac         break;
572e48d15efSToby Isaac       case 2:
5739371c9d4SSatish Balay         s[0] = xb[i2];
5749371c9d4SSatish Balay         s[1] = xb[i2 + 1];
575e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5769371c9d4SSatish Balay         x[i2]     = xw[0];
5779371c9d4SSatish Balay         x[i2 + 1] = xw[1];
578e48d15efSToby Isaac         i2 -= 2;
579e48d15efSToby Isaac         idiag -= 4;
580e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
581e48d15efSToby Isaac           v    = aa + 4 * (diag[i] + 1);
582e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
583e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
5849371c9d4SSatish Balay           s[0] = xb[i2];
5859371c9d4SSatish Balay           s[1] = xb[i2 + 1];
586e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
587e48d15efSToby Isaac             idx   = 2 * vi[j];
588e48d15efSToby Isaac             it    = 4 * j;
5899371c9d4SSatish Balay             xw[0] = x[idx];
5909371c9d4SSatish Balay             xw[1] = x[1 + idx];
591e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592e48d15efSToby Isaac           }
593e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5949371c9d4SSatish Balay           x[i2]     = xw[0];
5959371c9d4SSatish Balay           x[i2 + 1] = xw[1];
596e48d15efSToby Isaac           idiag -= 4;
597e48d15efSToby Isaac           i2 -= 2;
598e48d15efSToby Isaac         }
599e48d15efSToby Isaac         break;
600e48d15efSToby Isaac       case 3:
6019371c9d4SSatish Balay         s[0] = xb[i2];
6029371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6039371c9d4SSatish Balay         s[2] = xb[i2 + 2];
604e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6059371c9d4SSatish Balay         x[i2]     = xw[0];
6069371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6079371c9d4SSatish Balay         x[i2 + 2] = xw[2];
608e48d15efSToby Isaac         i2 -= 3;
609e48d15efSToby Isaac         idiag -= 9;
610e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
611e48d15efSToby Isaac           v    = aa + 9 * (diag[i] + 1);
612e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
613e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6149371c9d4SSatish Balay           s[0] = xb[i2];
6159371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6169371c9d4SSatish Balay           s[2] = xb[i2 + 2];
617e48d15efSToby Isaac           while (nz--) {
618e48d15efSToby Isaac             idx   = 3 * (*vi++);
6199371c9d4SSatish Balay             xw[0] = x[idx];
6209371c9d4SSatish Balay             xw[1] = x[1 + idx];
6219371c9d4SSatish Balay             xw[2] = x[2 + idx];
622e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623e48d15efSToby Isaac             v += 9;
624e48d15efSToby Isaac           }
625e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6269371c9d4SSatish Balay           x[i2]     = xw[0];
6279371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6289371c9d4SSatish Balay           x[i2 + 2] = xw[2];
629e48d15efSToby Isaac           idiag -= 9;
630e48d15efSToby Isaac           i2 -= 3;
631e48d15efSToby Isaac         }
632e48d15efSToby Isaac         break;
633e48d15efSToby Isaac       case 4:
6349371c9d4SSatish Balay         s[0] = xb[i2];
6359371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6369371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6379371c9d4SSatish Balay         s[3] = xb[i2 + 3];
638e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6399371c9d4SSatish Balay         x[i2]     = xw[0];
6409371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6419371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6429371c9d4SSatish Balay         x[i2 + 3] = xw[3];
643e48d15efSToby Isaac         i2 -= 4;
644e48d15efSToby Isaac         idiag -= 16;
645e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
646e48d15efSToby Isaac           v    = aa + 16 * (diag[i] + 1);
647e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
648e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6499371c9d4SSatish Balay           s[0] = xb[i2];
6509371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6519371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6529371c9d4SSatish Balay           s[3] = xb[i2 + 3];
653e48d15efSToby Isaac           while (nz--) {
654e48d15efSToby Isaac             idx   = 4 * (*vi++);
6559371c9d4SSatish Balay             xw[0] = x[idx];
6569371c9d4SSatish Balay             xw[1] = x[1 + idx];
6579371c9d4SSatish Balay             xw[2] = x[2 + idx];
6589371c9d4SSatish Balay             xw[3] = x[3 + idx];
659e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660e48d15efSToby Isaac             v += 16;
661e48d15efSToby Isaac           }
662e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6639371c9d4SSatish Balay           x[i2]     = xw[0];
6649371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6659371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6669371c9d4SSatish Balay           x[i2 + 3] = xw[3];
667e48d15efSToby Isaac           idiag -= 16;
668e48d15efSToby Isaac           i2 -= 4;
669e48d15efSToby Isaac         }
670e48d15efSToby Isaac         break;
671e48d15efSToby Isaac       case 5:
6729371c9d4SSatish Balay         s[0] = xb[i2];
6739371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6749371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6759371c9d4SSatish Balay         s[3] = xb[i2 + 3];
6769371c9d4SSatish Balay         s[4] = xb[i2 + 4];
677e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6789371c9d4SSatish Balay         x[i2]     = xw[0];
6799371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6809371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6819371c9d4SSatish Balay         x[i2 + 3] = xw[3];
6829371c9d4SSatish Balay         x[i2 + 4] = xw[4];
683e48d15efSToby Isaac         i2 -= 5;
684e48d15efSToby Isaac         idiag -= 25;
685e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
686e48d15efSToby Isaac           v    = aa + 25 * (diag[i] + 1);
687e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
688e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6899371c9d4SSatish Balay           s[0] = xb[i2];
6909371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6919371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6929371c9d4SSatish Balay           s[3] = xb[i2 + 3];
6939371c9d4SSatish Balay           s[4] = xb[i2 + 4];
694e48d15efSToby Isaac           while (nz--) {
695e48d15efSToby Isaac             idx   = 5 * (*vi++);
6969371c9d4SSatish Balay             xw[0] = x[idx];
6979371c9d4SSatish Balay             xw[1] = x[1 + idx];
6989371c9d4SSatish Balay             xw[2] = x[2 + idx];
6999371c9d4SSatish Balay             xw[3] = x[3 + idx];
7009371c9d4SSatish Balay             xw[4] = x[4 + idx];
701e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702e48d15efSToby Isaac             v += 25;
703e48d15efSToby Isaac           }
704e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
7059371c9d4SSatish Balay           x[i2]     = xw[0];
7069371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7079371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7089371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7099371c9d4SSatish Balay           x[i2 + 4] = xw[4];
710e48d15efSToby Isaac           idiag -= 25;
711e48d15efSToby Isaac           i2 -= 5;
712e48d15efSToby Isaac         }
713e48d15efSToby Isaac         break;
714e48d15efSToby Isaac       case 6:
7159371c9d4SSatish Balay         s[0] = xb[i2];
7169371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7179371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7189371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7199371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7209371c9d4SSatish Balay         s[5] = xb[i2 + 5];
721e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7229371c9d4SSatish Balay         x[i2]     = xw[0];
7239371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7249371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7259371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7269371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7279371c9d4SSatish Balay         x[i2 + 5] = xw[5];
728e48d15efSToby Isaac         i2 -= 6;
729e48d15efSToby Isaac         idiag -= 36;
730e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
731e48d15efSToby Isaac           v    = aa + 36 * (diag[i] + 1);
732e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
733e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7349371c9d4SSatish Balay           s[0] = xb[i2];
7359371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7369371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7379371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7389371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7399371c9d4SSatish Balay           s[5] = xb[i2 + 5];
740e48d15efSToby Isaac           while (nz--) {
741e48d15efSToby Isaac             idx   = 6 * (*vi++);
7429371c9d4SSatish Balay             xw[0] = x[idx];
7439371c9d4SSatish Balay             xw[1] = x[1 + idx];
7449371c9d4SSatish Balay             xw[2] = x[2 + idx];
7459371c9d4SSatish Balay             xw[3] = x[3 + idx];
7469371c9d4SSatish Balay             xw[4] = x[4 + idx];
7479371c9d4SSatish Balay             xw[5] = x[5 + idx];
748e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749e48d15efSToby Isaac             v += 36;
750e48d15efSToby Isaac           }
751e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7529371c9d4SSatish Balay           x[i2]     = xw[0];
7539371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7549371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7559371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7569371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7579371c9d4SSatish Balay           x[i2 + 5] = xw[5];
758e48d15efSToby Isaac           idiag -= 36;
759e48d15efSToby Isaac           i2 -= 6;
760e48d15efSToby Isaac         }
761e48d15efSToby Isaac         break;
762e48d15efSToby Isaac       case 7:
7639371c9d4SSatish Balay         s[0] = xb[i2];
7649371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7659371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7669371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7679371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7689371c9d4SSatish Balay         s[5] = xb[i2 + 5];
7699371c9d4SSatish Balay         s[6] = xb[i2 + 6];
770e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
7719371c9d4SSatish Balay         x[i2]     = xw[0];
7729371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7739371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7749371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7759371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7769371c9d4SSatish Balay         x[i2 + 5] = xw[5];
7779371c9d4SSatish Balay         x[i2 + 6] = xw[6];
778e48d15efSToby Isaac         i2 -= 7;
779e48d15efSToby Isaac         idiag -= 49;
780e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
781e48d15efSToby Isaac           v    = aa + 49 * (diag[i] + 1);
782e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
783e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7849371c9d4SSatish Balay           s[0] = xb[i2];
7859371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7869371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7879371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7889371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7899371c9d4SSatish Balay           s[5] = xb[i2 + 5];
7909371c9d4SSatish Balay           s[6] = xb[i2 + 6];
791e48d15efSToby Isaac           while (nz--) {
792e48d15efSToby Isaac             idx   = 7 * (*vi++);
7939371c9d4SSatish Balay             xw[0] = x[idx];
7949371c9d4SSatish Balay             xw[1] = x[1 + idx];
7959371c9d4SSatish Balay             xw[2] = x[2 + idx];
7969371c9d4SSatish Balay             xw[3] = x[3 + idx];
7979371c9d4SSatish Balay             xw[4] = x[4 + idx];
7989371c9d4SSatish Balay             xw[5] = x[5 + idx];
7999371c9d4SSatish Balay             xw[6] = x[6 + idx];
800e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801e48d15efSToby Isaac             v += 49;
802e48d15efSToby Isaac           }
803e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
8049371c9d4SSatish Balay           x[i2]     = xw[0];
8059371c9d4SSatish Balay           x[i2 + 1] = xw[1];
8069371c9d4SSatish Balay           x[i2 + 2] = xw[2];
8079371c9d4SSatish Balay           x[i2 + 3] = xw[3];
8089371c9d4SSatish Balay           x[i2 + 4] = xw[4];
8099371c9d4SSatish Balay           x[i2 + 5] = xw[5];
8109371c9d4SSatish Balay           x[i2 + 6] = xw[6];
811e48d15efSToby Isaac           idiag -= 49;
812e48d15efSToby Isaac           i2 -= 7;
813e48d15efSToby Isaac         }
814e48d15efSToby Isaac         break;
815e48d15efSToby Isaac       default:
8169566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(w, xb + i2, bs));
81796b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818de80f912SBarry Smith         i2 -= bs;
819e48d15efSToby Isaac         idiag -= bs2;
820de80f912SBarry Smith         for (i = m - 2; i >= 0; i--) {
821de80f912SBarry Smith           v  = aa + bs2 * (diag[i] + 1);
822de80f912SBarry Smith           vi = aj + diag[i] + 1;
823de80f912SBarry Smith           nz = ai[i + 1] - diag[i] - 1;
824de80f912SBarry Smith 
8259566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, xb + i2, bs));
826de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
827de80f912SBarry Smith           workt = work;
828de80f912SBarry Smith           for (j = 0; j < nz; j++) {
8299566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830de80f912SBarry Smith             workt += bs;
831de80f912SBarry Smith           }
83296b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
83396b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834e48d15efSToby Isaac 
835de80f912SBarry Smith           idiag -= bs2;
836de80f912SBarry Smith           i2 -= bs;
837de80f912SBarry Smith         }
838e48d15efSToby Isaac         break;
839e48d15efSToby Isaac       }
8409566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841de80f912SBarry Smith     }
842e48d15efSToby Isaac     its--;
843e48d15efSToby Isaac   }
844e48d15efSToby Isaac   while (its--) {
845e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846e48d15efSToby Isaac       idiag = a->idiag;
847e48d15efSToby Isaac       i2    = 0;
848e48d15efSToby Isaac       switch (bs) {
849e48d15efSToby Isaac       case 1:
850e48d15efSToby Isaac         for (i = 0; i < m; i++) {
851e48d15efSToby Isaac           v    = aa + ai[i];
852e48d15efSToby Isaac           vi   = aj + ai[i];
853e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
854e48d15efSToby Isaac           s[0] = b[i2];
855e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
856e48d15efSToby Isaac             xw[0] = x[vi[j]];
857e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858e48d15efSToby Isaac           }
859e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860e48d15efSToby Isaac           x[i2] += xw[0];
861e48d15efSToby Isaac           idiag += 1;
862e48d15efSToby Isaac           i2 += 1;
863e48d15efSToby Isaac         }
864e48d15efSToby Isaac         break;
865e48d15efSToby Isaac       case 2:
866e48d15efSToby Isaac         for (i = 0; i < m; i++) {
867e48d15efSToby Isaac           v    = aa + 4 * ai[i];
868e48d15efSToby Isaac           vi   = aj + ai[i];
869e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8709371c9d4SSatish Balay           s[0] = b[i2];
8719371c9d4SSatish Balay           s[1] = b[i2 + 1];
872e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
873e48d15efSToby Isaac             idx   = 2 * vi[j];
874e48d15efSToby Isaac             it    = 4 * j;
8759371c9d4SSatish Balay             xw[0] = x[idx];
8769371c9d4SSatish Balay             xw[1] = x[1 + idx];
877e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878e48d15efSToby Isaac           }
879e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
8809371c9d4SSatish Balay           x[i2] += xw[0];
8819371c9d4SSatish Balay           x[i2 + 1] += xw[1];
882e48d15efSToby Isaac           idiag += 4;
883e48d15efSToby Isaac           i2 += 2;
884e48d15efSToby Isaac         }
885e48d15efSToby Isaac         break;
886e48d15efSToby Isaac       case 3:
887e48d15efSToby Isaac         for (i = 0; i < m; i++) {
888e48d15efSToby Isaac           v    = aa + 9 * ai[i];
889e48d15efSToby Isaac           vi   = aj + ai[i];
890e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8919371c9d4SSatish Balay           s[0] = b[i2];
8929371c9d4SSatish Balay           s[1] = b[i2 + 1];
8939371c9d4SSatish Balay           s[2] = b[i2 + 2];
894e48d15efSToby Isaac           while (nz--) {
895e48d15efSToby Isaac             idx   = 3 * (*vi++);
8969371c9d4SSatish Balay             xw[0] = x[idx];
8979371c9d4SSatish Balay             xw[1] = x[1 + idx];
8989371c9d4SSatish Balay             xw[2] = x[2 + idx];
899e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900e48d15efSToby Isaac             v += 9;
901e48d15efSToby Isaac           }
902e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
9039371c9d4SSatish Balay           x[i2] += xw[0];
9049371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9059371c9d4SSatish Balay           x[i2 + 2] += xw[2];
906e48d15efSToby Isaac           idiag += 9;
907e48d15efSToby Isaac           i2 += 3;
908e48d15efSToby Isaac         }
909e48d15efSToby Isaac         break;
910e48d15efSToby Isaac       case 4:
911e48d15efSToby Isaac         for (i = 0; i < m; i++) {
912e48d15efSToby Isaac           v    = aa + 16 * ai[i];
913e48d15efSToby Isaac           vi   = aj + ai[i];
914e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9159371c9d4SSatish Balay           s[0] = b[i2];
9169371c9d4SSatish Balay           s[1] = b[i2 + 1];
9179371c9d4SSatish Balay           s[2] = b[i2 + 2];
9189371c9d4SSatish Balay           s[3] = b[i2 + 3];
919e48d15efSToby Isaac           while (nz--) {
920e48d15efSToby Isaac             idx   = 4 * (*vi++);
9219371c9d4SSatish Balay             xw[0] = x[idx];
9229371c9d4SSatish Balay             xw[1] = x[1 + idx];
9239371c9d4SSatish Balay             xw[2] = x[2 + idx];
9249371c9d4SSatish Balay             xw[3] = x[3 + idx];
925e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926e48d15efSToby Isaac             v += 16;
927e48d15efSToby Isaac           }
928e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
9299371c9d4SSatish Balay           x[i2] += xw[0];
9309371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9319371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9329371c9d4SSatish Balay           x[i2 + 3] += xw[3];
933e48d15efSToby Isaac           idiag += 16;
934e48d15efSToby Isaac           i2 += 4;
935e48d15efSToby Isaac         }
936e48d15efSToby Isaac         break;
937e48d15efSToby Isaac       case 5:
938e48d15efSToby Isaac         for (i = 0; i < m; i++) {
939e48d15efSToby Isaac           v    = aa + 25 * ai[i];
940e48d15efSToby Isaac           vi   = aj + ai[i];
941e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9429371c9d4SSatish Balay           s[0] = b[i2];
9439371c9d4SSatish Balay           s[1] = b[i2 + 1];
9449371c9d4SSatish Balay           s[2] = b[i2 + 2];
9459371c9d4SSatish Balay           s[3] = b[i2 + 3];
9469371c9d4SSatish Balay           s[4] = b[i2 + 4];
947e48d15efSToby Isaac           while (nz--) {
948e48d15efSToby Isaac             idx   = 5 * (*vi++);
9499371c9d4SSatish Balay             xw[0] = x[idx];
9509371c9d4SSatish Balay             xw[1] = x[1 + idx];
9519371c9d4SSatish Balay             xw[2] = x[2 + idx];
9529371c9d4SSatish Balay             xw[3] = x[3 + idx];
9539371c9d4SSatish Balay             xw[4] = x[4 + idx];
954e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955e48d15efSToby Isaac             v += 25;
956e48d15efSToby Isaac           }
957e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
9589371c9d4SSatish Balay           x[i2] += xw[0];
9599371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9609371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9619371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9629371c9d4SSatish Balay           x[i2 + 4] += xw[4];
963e48d15efSToby Isaac           idiag += 25;
964e48d15efSToby Isaac           i2 += 5;
965e48d15efSToby Isaac         }
966e48d15efSToby Isaac         break;
967e48d15efSToby Isaac       case 6:
968e48d15efSToby Isaac         for (i = 0; i < m; i++) {
969e48d15efSToby Isaac           v    = aa + 36 * ai[i];
970e48d15efSToby Isaac           vi   = aj + ai[i];
971e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9729371c9d4SSatish Balay           s[0] = b[i2];
9739371c9d4SSatish Balay           s[1] = b[i2 + 1];
9749371c9d4SSatish Balay           s[2] = b[i2 + 2];
9759371c9d4SSatish Balay           s[3] = b[i2 + 3];
9769371c9d4SSatish Balay           s[4] = b[i2 + 4];
9779371c9d4SSatish Balay           s[5] = b[i2 + 5];
978e48d15efSToby Isaac           while (nz--) {
979e48d15efSToby Isaac             idx   = 6 * (*vi++);
9809371c9d4SSatish Balay             xw[0] = x[idx];
9819371c9d4SSatish Balay             xw[1] = x[1 + idx];
9829371c9d4SSatish Balay             xw[2] = x[2 + idx];
9839371c9d4SSatish Balay             xw[3] = x[3 + idx];
9849371c9d4SSatish Balay             xw[4] = x[4 + idx];
9859371c9d4SSatish Balay             xw[5] = x[5 + idx];
986e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987e48d15efSToby Isaac             v += 36;
988e48d15efSToby Isaac           }
989e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
9909371c9d4SSatish Balay           x[i2] += xw[0];
9919371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9929371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9939371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9949371c9d4SSatish Balay           x[i2 + 4] += xw[4];
9959371c9d4SSatish Balay           x[i2 + 5] += xw[5];
996e48d15efSToby Isaac           idiag += 36;
997e48d15efSToby Isaac           i2 += 6;
998e48d15efSToby Isaac         }
999e48d15efSToby Isaac         break;
1000e48d15efSToby Isaac       case 7:
1001e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1002e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1003e48d15efSToby Isaac           vi   = aj + ai[i];
1004e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10059371c9d4SSatish Balay           s[0] = b[i2];
10069371c9d4SSatish Balay           s[1] = b[i2 + 1];
10079371c9d4SSatish Balay           s[2] = b[i2 + 2];
10089371c9d4SSatish Balay           s[3] = b[i2 + 3];
10099371c9d4SSatish Balay           s[4] = b[i2 + 4];
10109371c9d4SSatish Balay           s[5] = b[i2 + 5];
10119371c9d4SSatish Balay           s[6] = b[i2 + 6];
1012e48d15efSToby Isaac           while (nz--) {
1013e48d15efSToby Isaac             idx   = 7 * (*vi++);
10149371c9d4SSatish Balay             xw[0] = x[idx];
10159371c9d4SSatish Balay             xw[1] = x[1 + idx];
10169371c9d4SSatish Balay             xw[2] = x[2 + idx];
10179371c9d4SSatish Balay             xw[3] = x[3 + idx];
10189371c9d4SSatish Balay             xw[4] = x[4 + idx];
10199371c9d4SSatish Balay             xw[5] = x[5 + idx];
10209371c9d4SSatish Balay             xw[6] = x[6 + idx];
1021e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022e48d15efSToby Isaac             v += 49;
1023e48d15efSToby Isaac           }
1024e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
10259371c9d4SSatish Balay           x[i2] += xw[0];
10269371c9d4SSatish Balay           x[i2 + 1] += xw[1];
10279371c9d4SSatish Balay           x[i2 + 2] += xw[2];
10289371c9d4SSatish Balay           x[i2 + 3] += xw[3];
10299371c9d4SSatish Balay           x[i2 + 4] += xw[4];
10309371c9d4SSatish Balay           x[i2 + 5] += xw[5];
10319371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1032e48d15efSToby Isaac           idiag += 49;
1033e48d15efSToby Isaac           i2 += 7;
1034e48d15efSToby Isaac         }
1035e48d15efSToby Isaac         break;
1036e48d15efSToby Isaac       default:
1037e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1038e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1039e48d15efSToby Isaac           vi = aj + ai[i];
1040e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1041e48d15efSToby Isaac 
10429566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1043e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1044e48d15efSToby Isaac           workt = work;
1045e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
10469566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047e48d15efSToby Isaac             workt += bs;
1048e48d15efSToby Isaac           }
1049e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051e48d15efSToby Isaac 
1052e48d15efSToby Isaac           idiag += bs2;
1053e48d15efSToby Isaac           i2 += bs;
1054e48d15efSToby Isaac         }
1055e48d15efSToby Isaac         break;
1056e48d15efSToby Isaac       }
10579566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058e48d15efSToby Isaac     }
1059e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
1061e48d15efSToby Isaac       i2    = bs * (m - 1);
1062e48d15efSToby Isaac       switch (bs) {
1063e48d15efSToby Isaac       case 1:
1064e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1065e48d15efSToby Isaac           v    = aa + ai[i];
1066e48d15efSToby Isaac           vi   = aj + ai[i];
1067e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
1068e48d15efSToby Isaac           s[0] = b[i2];
1069e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1070e48d15efSToby Isaac             xw[0] = x[vi[j]];
1071e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072e48d15efSToby Isaac           }
1073e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074e48d15efSToby Isaac           x[i2] += xw[0];
1075e48d15efSToby Isaac           idiag -= 1;
1076e48d15efSToby Isaac           i2 -= 1;
1077e48d15efSToby Isaac         }
1078e48d15efSToby Isaac         break;
1079e48d15efSToby Isaac       case 2:
1080e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1081e48d15efSToby Isaac           v    = aa + 4 * ai[i];
1082e48d15efSToby Isaac           vi   = aj + ai[i];
1083e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10849371c9d4SSatish Balay           s[0] = b[i2];
10859371c9d4SSatish Balay           s[1] = b[i2 + 1];
1086e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1087e48d15efSToby Isaac             idx   = 2 * vi[j];
1088e48d15efSToby Isaac             it    = 4 * j;
10899371c9d4SSatish Balay             xw[0] = x[idx];
10909371c9d4SSatish Balay             xw[1] = x[1 + idx];
1091e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092e48d15efSToby Isaac           }
1093e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
10949371c9d4SSatish Balay           x[i2] += xw[0];
10959371c9d4SSatish Balay           x[i2 + 1] += xw[1];
1096e48d15efSToby Isaac           idiag -= 4;
1097e48d15efSToby Isaac           i2 -= 2;
1098e48d15efSToby Isaac         }
1099e48d15efSToby Isaac         break;
1100e48d15efSToby Isaac       case 3:
1101e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1102e48d15efSToby Isaac           v    = aa + 9 * ai[i];
1103e48d15efSToby Isaac           vi   = aj + ai[i];
1104e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11059371c9d4SSatish Balay           s[0] = b[i2];
11069371c9d4SSatish Balay           s[1] = b[i2 + 1];
11079371c9d4SSatish Balay           s[2] = b[i2 + 2];
1108e48d15efSToby Isaac           while (nz--) {
1109e48d15efSToby Isaac             idx   = 3 * (*vi++);
11109371c9d4SSatish Balay             xw[0] = x[idx];
11119371c9d4SSatish Balay             xw[1] = x[1 + idx];
11129371c9d4SSatish Balay             xw[2] = x[2 + idx];
1113e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114e48d15efSToby Isaac             v += 9;
1115e48d15efSToby Isaac           }
1116e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
11179371c9d4SSatish Balay           x[i2] += xw[0];
11189371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11199371c9d4SSatish Balay           x[i2 + 2] += xw[2];
1120e48d15efSToby Isaac           idiag -= 9;
1121e48d15efSToby Isaac           i2 -= 3;
1122e48d15efSToby Isaac         }
1123e48d15efSToby Isaac         break;
1124e48d15efSToby Isaac       case 4:
1125e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1126e48d15efSToby Isaac           v    = aa + 16 * ai[i];
1127e48d15efSToby Isaac           vi   = aj + ai[i];
1128e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11299371c9d4SSatish Balay           s[0] = b[i2];
11309371c9d4SSatish Balay           s[1] = b[i2 + 1];
11319371c9d4SSatish Balay           s[2] = b[i2 + 2];
11329371c9d4SSatish Balay           s[3] = b[i2 + 3];
1133e48d15efSToby Isaac           while (nz--) {
1134e48d15efSToby Isaac             idx   = 4 * (*vi++);
11359371c9d4SSatish Balay             xw[0] = x[idx];
11369371c9d4SSatish Balay             xw[1] = x[1 + idx];
11379371c9d4SSatish Balay             xw[2] = x[2 + idx];
11389371c9d4SSatish Balay             xw[3] = x[3 + idx];
1139e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140e48d15efSToby Isaac             v += 16;
1141e48d15efSToby Isaac           }
1142e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
11439371c9d4SSatish Balay           x[i2] += xw[0];
11449371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11459371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11469371c9d4SSatish Balay           x[i2 + 3] += xw[3];
1147e48d15efSToby Isaac           idiag -= 16;
1148e48d15efSToby Isaac           i2 -= 4;
1149e48d15efSToby Isaac         }
1150e48d15efSToby Isaac         break;
1151e48d15efSToby Isaac       case 5:
1152e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1153e48d15efSToby Isaac           v    = aa + 25 * ai[i];
1154e48d15efSToby Isaac           vi   = aj + ai[i];
1155e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11569371c9d4SSatish Balay           s[0] = b[i2];
11579371c9d4SSatish Balay           s[1] = b[i2 + 1];
11589371c9d4SSatish Balay           s[2] = b[i2 + 2];
11599371c9d4SSatish Balay           s[3] = b[i2 + 3];
11609371c9d4SSatish Balay           s[4] = b[i2 + 4];
1161e48d15efSToby Isaac           while (nz--) {
1162e48d15efSToby Isaac             idx   = 5 * (*vi++);
11639371c9d4SSatish Balay             xw[0] = x[idx];
11649371c9d4SSatish Balay             xw[1] = x[1 + idx];
11659371c9d4SSatish Balay             xw[2] = x[2 + idx];
11669371c9d4SSatish Balay             xw[3] = x[3 + idx];
11679371c9d4SSatish Balay             xw[4] = x[4 + idx];
1168e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169e48d15efSToby Isaac             v += 25;
1170e48d15efSToby Isaac           }
1171e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
11729371c9d4SSatish Balay           x[i2] += xw[0];
11739371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11749371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11759371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11769371c9d4SSatish Balay           x[i2 + 4] += xw[4];
1177e48d15efSToby Isaac           idiag -= 25;
1178e48d15efSToby Isaac           i2 -= 5;
1179e48d15efSToby Isaac         }
1180e48d15efSToby Isaac         break;
1181e48d15efSToby Isaac       case 6:
1182e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1183e48d15efSToby Isaac           v    = aa + 36 * ai[i];
1184e48d15efSToby Isaac           vi   = aj + ai[i];
1185e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11869371c9d4SSatish Balay           s[0] = b[i2];
11879371c9d4SSatish Balay           s[1] = b[i2 + 1];
11889371c9d4SSatish Balay           s[2] = b[i2 + 2];
11899371c9d4SSatish Balay           s[3] = b[i2 + 3];
11909371c9d4SSatish Balay           s[4] = b[i2 + 4];
11919371c9d4SSatish Balay           s[5] = b[i2 + 5];
1192e48d15efSToby Isaac           while (nz--) {
1193e48d15efSToby Isaac             idx   = 6 * (*vi++);
11949371c9d4SSatish Balay             xw[0] = x[idx];
11959371c9d4SSatish Balay             xw[1] = x[1 + idx];
11969371c9d4SSatish Balay             xw[2] = x[2 + idx];
11979371c9d4SSatish Balay             xw[3] = x[3 + idx];
11989371c9d4SSatish Balay             xw[4] = x[4 + idx];
11999371c9d4SSatish Balay             xw[5] = x[5 + idx];
1200e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201e48d15efSToby Isaac             v += 36;
1202e48d15efSToby Isaac           }
1203e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
12049371c9d4SSatish Balay           x[i2] += xw[0];
12059371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12069371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12079371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12089371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12099371c9d4SSatish Balay           x[i2 + 5] += xw[5];
1210e48d15efSToby Isaac           idiag -= 36;
1211e48d15efSToby Isaac           i2 -= 6;
1212e48d15efSToby Isaac         }
1213e48d15efSToby Isaac         break;
1214e48d15efSToby Isaac       case 7:
1215e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1216e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1217e48d15efSToby Isaac           vi   = aj + ai[i];
1218e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
12199371c9d4SSatish Balay           s[0] = b[i2];
12209371c9d4SSatish Balay           s[1] = b[i2 + 1];
12219371c9d4SSatish Balay           s[2] = b[i2 + 2];
12229371c9d4SSatish Balay           s[3] = b[i2 + 3];
12239371c9d4SSatish Balay           s[4] = b[i2 + 4];
12249371c9d4SSatish Balay           s[5] = b[i2 + 5];
12259371c9d4SSatish Balay           s[6] = b[i2 + 6];
1226e48d15efSToby Isaac           while (nz--) {
1227e48d15efSToby Isaac             idx   = 7 * (*vi++);
12289371c9d4SSatish Balay             xw[0] = x[idx];
12299371c9d4SSatish Balay             xw[1] = x[1 + idx];
12309371c9d4SSatish Balay             xw[2] = x[2 + idx];
12319371c9d4SSatish Balay             xw[3] = x[3 + idx];
12329371c9d4SSatish Balay             xw[4] = x[4 + idx];
12339371c9d4SSatish Balay             xw[5] = x[5 + idx];
12349371c9d4SSatish Balay             xw[6] = x[6 + idx];
1235e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236e48d15efSToby Isaac             v += 49;
1237e48d15efSToby Isaac           }
1238e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
12399371c9d4SSatish Balay           x[i2] += xw[0];
12409371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12419371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12429371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12439371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12449371c9d4SSatish Balay           x[i2 + 5] += xw[5];
12459371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1246e48d15efSToby Isaac           idiag -= 49;
1247e48d15efSToby Isaac           i2 -= 7;
1248e48d15efSToby Isaac         }
1249e48d15efSToby Isaac         break;
1250e48d15efSToby Isaac       default:
1251e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1252e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1253e48d15efSToby Isaac           vi = aj + ai[i];
1254e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1255e48d15efSToby Isaac 
12569566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1257e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1258e48d15efSToby Isaac           workt = work;
1259e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
12609566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261e48d15efSToby Isaac             workt += bs;
1262e48d15efSToby Isaac           }
1263e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265e48d15efSToby Isaac 
1266e48d15efSToby Isaac           idiag -= bs2;
1267e48d15efSToby Isaac           i2 -= bs;
1268e48d15efSToby Isaac         }
1269e48d15efSToby Isaac         break;
1270e48d15efSToby Isaac       }
12719566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272e48d15efSToby Isaac     }
1273e48d15efSToby Isaac   }
12749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1277de80f912SBarry Smith }
1278de80f912SBarry Smith 
1279af674e45SBarry Smith /*
128081824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281af674e45SBarry Smith */
1282af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283af674e45SBarry Smith   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285af674e45SBarry Smith   #define matsetvaluesblocked4_ matsetvaluesblocked4
1286af674e45SBarry Smith #endif
1287af674e45SBarry Smith 
1288d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289d71ae5a4SJacob Faibussowitsch {
1290af674e45SBarry Smith   Mat                A = *AA;
1291af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1292c1ac3661SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293c1ac3661SBarry Smith   PetscInt          *ai = a->i, *ailen = a->ilen;
129417ec6a02SBarry Smith   PetscInt          *aj = a->j, stepval, lastcol = -1;
1295f15d580aSBarry Smith   const PetscScalar *value = v;
12964bb09213Spetsc   MatScalar         *ap, *aa = a->a, *bap;
1297af674e45SBarry Smith 
1298af674e45SBarry Smith   PetscFunctionBegin;
1299ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300af674e45SBarry Smith   stepval = (n - 1) * 4;
1301af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
1302af674e45SBarry Smith     row  = im[k];
1303af674e45SBarry Smith     rp   = aj + ai[row];
1304af674e45SBarry Smith     ap   = aa + 16 * ai[row];
1305af674e45SBarry Smith     nrow = ailen[row];
1306af674e45SBarry Smith     low  = 0;
130717ec6a02SBarry Smith     high = nrow;
1308af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
1309af674e45SBarry Smith       col = in[l];
1310db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1311db4deed7SKarl Rupp       else high = nrow;
131217ec6a02SBarry Smith       lastcol = col;
13131e3347e8SBarry Smith       value   = v + k * (stepval + 4 + l) * 4;
1314af674e45SBarry Smith       while (high - low > 7) {
1315af674e45SBarry Smith         t = (low + high) / 2;
1316af674e45SBarry Smith         if (rp[t] > col) high = t;
1317af674e45SBarry Smith         else low = t;
1318af674e45SBarry Smith       }
1319af674e45SBarry Smith       for (i = low; i < high; i++) {
1320af674e45SBarry Smith         if (rp[i] > col) break;
1321af674e45SBarry Smith         if (rp[i] == col) {
1322af674e45SBarry Smith           bap = ap + 16 * i;
1323af674e45SBarry Smith           for (ii = 0; ii < 4; ii++, value += stepval) {
1324ad540459SPierre Jolivet             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325af674e45SBarry Smith           }
1326af674e45SBarry Smith           goto noinsert2;
1327af674e45SBarry Smith         }
1328af674e45SBarry Smith       }
1329af674e45SBarry Smith       N = nrow++ - 1;
133017ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
1331af674e45SBarry Smith       /* shift up all the later entries in this row */
1332af674e45SBarry Smith       for (ii = N; ii >= i; ii--) {
1333af674e45SBarry Smith         rp[ii + 1] = rp[ii];
13349566063dSJacob Faibussowitsch         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335af674e45SBarry Smith       }
133648a46eb9SPierre Jolivet       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337af674e45SBarry Smith       rp[i] = col;
1338af674e45SBarry Smith       bap   = ap + 16 * i;
1339af674e45SBarry Smith       for (ii = 0; ii < 4; ii++, value += stepval) {
1340ad540459SPierre Jolivet         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341af674e45SBarry Smith       }
1342af674e45SBarry Smith     noinsert2:;
1343af674e45SBarry Smith       low = i;
1344af674e45SBarry Smith     }
1345af674e45SBarry Smith     ailen[row] = nrow;
1346af674e45SBarry Smith   }
1347be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1348af674e45SBarry Smith }
1349af674e45SBarry Smith 
1350af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351af674e45SBarry Smith   #define matsetvalues4_ MATSETVALUES4
1352af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353af674e45SBarry Smith   #define matsetvalues4_ matsetvalues4
1354af674e45SBarry Smith #endif
1355af674e45SBarry Smith 
1356d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357d71ae5a4SJacob Faibussowitsch {
1358af674e45SBarry Smith   Mat          A = *AA;
1359af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360580bdb30SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
1362c1ac3661SBarry Smith   PetscInt    *aj = a->j, brow, bcol;
136317ec6a02SBarry Smith   PetscInt     ridx, cidx, lastcol = -1;
1364af674e45SBarry Smith   MatScalar   *ap, value, *aa      = a->a, *bap;
1365af674e45SBarry Smith 
1366af674e45SBarry Smith   PetscFunctionBegin;
1367af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
13689371c9d4SSatish Balay     row  = im[k];
13699371c9d4SSatish Balay     brow = row / 4;
1370af674e45SBarry Smith     rp   = aj + ai[brow];
1371af674e45SBarry Smith     ap   = aa + 16 * ai[brow];
1372af674e45SBarry Smith     nrow = ailen[brow];
1373af674e45SBarry Smith     low  = 0;
137417ec6a02SBarry Smith     high = nrow;
1375af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
13769371c9d4SSatish Balay       col   = in[l];
13779371c9d4SSatish Balay       bcol  = col / 4;
13789371c9d4SSatish Balay       ridx  = row % 4;
13799371c9d4SSatish Balay       cidx  = col % 4;
1380af674e45SBarry Smith       value = v[l + k * n];
1381db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1382db4deed7SKarl Rupp       else high = nrow;
138317ec6a02SBarry Smith       lastcol = col;
1384af674e45SBarry Smith       while (high - low > 7) {
1385af674e45SBarry Smith         t = (low + high) / 2;
1386af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1387af674e45SBarry Smith         else low = t;
1388af674e45SBarry Smith       }
1389af674e45SBarry Smith       for (i = low; i < high; i++) {
1390af674e45SBarry Smith         if (rp[i] > bcol) break;
1391af674e45SBarry Smith         if (rp[i] == bcol) {
1392af674e45SBarry Smith           bap = ap + 16 * i + 4 * cidx + ridx;
1393af674e45SBarry Smith           *bap += value;
1394af674e45SBarry Smith           goto noinsert1;
1395af674e45SBarry Smith         }
1396af674e45SBarry Smith       }
1397af674e45SBarry Smith       N = nrow++ - 1;
139817ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1399af674e45SBarry Smith       /* shift up all the later entries in this row */
14009566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
14019566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
14029566063dSJacob Faibussowitsch       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403af674e45SBarry Smith       rp[i]                        = bcol;
1404af674e45SBarry Smith       ap[16 * i + 4 * cidx + ridx] = value;
1405af674e45SBarry Smith     noinsert1:;
1406af674e45SBarry Smith       low = i;
1407af674e45SBarry Smith     }
1408af674e45SBarry Smith     ailen[brow] = nrow;
1409af674e45SBarry Smith   }
1410be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1411af674e45SBarry Smith }
1412af674e45SBarry Smith 
1413d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1414d71ae5a4SJacob Faibussowitsch {
14153b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14161a83f524SJed Brown   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
14171a83f524SJed Brown   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
14183b2fbd54SBarry Smith 
14193a40ed3dSBarry Smith   PetscFunctionBegin;
14203b2fbd54SBarry Smith   *nn = n;
14213ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
14223b2fbd54SBarry Smith   if (symmetric) {
14239566063dSJacob Faibussowitsch     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1424553b3c51SBarry Smith     nz = tia[n];
14253b2fbd54SBarry Smith   } else {
14269371c9d4SSatish Balay     tia = a->i;
14279371c9d4SSatish Balay     tja = a->j;
14283b2fbd54SBarry Smith   }
14293b2fbd54SBarry Smith 
1430ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1431ecc77c7aSBarry Smith     (*nn) *= bs;
14328f7157efSSatish Balay     /* malloc & create the natural set of indices */
14339566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((n + 1) * bs, ia));
14349985e31cSBarry Smith     if (n) {
14352462f5fdSStefano Zampini       (*ia)[0] = oshift;
1436ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
14379985e31cSBarry Smith     }
1438ecc77c7aSBarry Smith 
1439ecc77c7aSBarry Smith     for (i = 1; i < n; i++) {
1440ecc77c7aSBarry Smith       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1441ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
14428f7157efSSatish Balay     }
1443ad540459SPierre Jolivet     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1444ecc77c7aSBarry Smith 
14451a83f524SJed Brown     if (inja) {
14469566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz * bs * bs, ja));
14479985e31cSBarry Smith       cnt = 0;
14489985e31cSBarry Smith       for (i = 0; i < n; i++) {
14499985e31cSBarry Smith         for (j = 0; j < bs; j++) {
14509985e31cSBarry Smith           for (k = tia[i]; k < tia[i + 1]; k++) {
1451ad540459SPierre Jolivet             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
14529985e31cSBarry Smith           }
14539985e31cSBarry Smith         }
14549985e31cSBarry Smith       }
14559985e31cSBarry Smith     }
14569985e31cSBarry Smith 
14578f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
14589566063dSJacob Faibussowitsch       PetscCall(PetscFree(tia));
14599566063dSJacob Faibussowitsch       PetscCall(PetscFree(tja));
14608f7157efSSatish Balay     }
1461f6d58c54SBarry Smith   } else if (oshift == 1) {
1462715a17b5SBarry Smith     if (symmetric) {
1463a2ea699eSBarry Smith       nz = tia[A->rmap->n / bs];
1464715a17b5SBarry Smith       /*  add 1 to i and j indices */
1465715a17b5SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1466715a17b5SBarry Smith       *ia = tia;
1467715a17b5SBarry Smith       if (ja) {
1468715a17b5SBarry Smith         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1469715a17b5SBarry Smith         *ja = tja;
1470715a17b5SBarry Smith       }
1471715a17b5SBarry Smith     } else {
1472a2ea699eSBarry Smith       nz = a->i[A->rmap->n / bs];
1473f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
14749566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1475f6d58c54SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1476f6d58c54SBarry Smith       if (ja) {
14779566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(nz, ja));
1478f6d58c54SBarry Smith         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1479f6d58c54SBarry Smith       }
1480715a17b5SBarry Smith     }
14818f7157efSSatish Balay   } else {
14828f7157efSSatish Balay     *ia = tia;
1483ecc77c7aSBarry Smith     if (ja) *ja = tja;
14848f7157efSSatish Balay   }
14853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14863b2fbd54SBarry Smith }
14873b2fbd54SBarry Smith 
1488d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1489d71ae5a4SJacob Faibussowitsch {
14903a40ed3dSBarry Smith   PetscFunctionBegin;
14913ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1492715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
14939566063dSJacob Faibussowitsch     PetscCall(PetscFree(*ia));
14949566063dSJacob Faibussowitsch     if (ja) PetscCall(PetscFree(*ja));
14953b2fbd54SBarry Smith   }
14963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14973b2fbd54SBarry Smith }
14983b2fbd54SBarry Smith 
1499d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1500d71ae5a4SJacob Faibussowitsch {
15012d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15022d61bbb3SSatish Balay 
1503433994e6SBarry Smith   PetscFunctionBegin;
1504b4e2f619SBarry Smith   if (A->hash_active) {
1505b4e2f619SBarry Smith     PetscInt bs;
1506e3c72094SPierre Jolivet     A->ops[0] = a->cops;
1507b4e2f619SBarry Smith     PetscCall(PetscHMapIJVDestroy(&a->ht));
1508b4e2f619SBarry Smith     PetscCall(MatGetBlockSize(A, &bs));
1509b4e2f619SBarry Smith     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1510b4e2f619SBarry Smith     PetscCall(PetscFree(a->dnz));
1511b4e2f619SBarry Smith     PetscCall(PetscFree(a->bdnz));
1512b4e2f619SBarry Smith     A->hash_active = PETSC_FALSE;
1513b4e2f619SBarry Smith   }
15143ba16761SJacob Faibussowitsch   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
15159566063dSJacob Faibussowitsch   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
15169566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
15179566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
1518*421480d9SBarry Smith   PetscCall(PetscFree(a->diag));
15199566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->idiag));
15209566063dSJacob Faibussowitsch   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
15219566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->solve_work));
15229566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->mult_work));
15239566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_workt));
15249566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_work));
15259566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
15269566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->saved_values));
15279566063dSJacob Faibussowitsch   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1528c4319e64SHong Zhang 
15299566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->sbaijMat));
15309566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->parent));
15319566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1532901853e0SKris Buschelman 
15339566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
15349566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
15359566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
15369566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
15379566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
15389566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
15399566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
15409566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
15419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
15429566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
15439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
15449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
15457ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
15469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
15477ea3e4caSstefano_zampini #endif
15489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
15492e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
15503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15512d61bbb3SSatish Balay }
15522d61bbb3SSatish Balay 
155366976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1554d71ae5a4SJacob Faibussowitsch {
15552d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15562d61bbb3SSatish Balay 
15572d61bbb3SSatish Balay   PetscFunctionBegin;
1558aa275fccSKris Buschelman   switch (op) {
1559d71ae5a4SJacob Faibussowitsch   case MAT_ROW_ORIENTED:
1560d71ae5a4SJacob Faibussowitsch     a->roworiented = flg;
1561d71ae5a4SJacob Faibussowitsch     break;
1562d71ae5a4SJacob Faibussowitsch   case MAT_KEEP_NONZERO_PATTERN:
1563d71ae5a4SJacob Faibussowitsch     a->keepnonzeropattern = flg;
1564d71ae5a4SJacob Faibussowitsch     break;
1565d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATIONS:
1566d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? 0 : 1);
1567d71ae5a4SJacob Faibussowitsch     break;
1568d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATION_ERR:
1569d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -1 : 0);
1570d71ae5a4SJacob Faibussowitsch     break;
1571d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1572d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -2 : 0);
1573d71ae5a4SJacob Faibussowitsch     break;
1574d71ae5a4SJacob Faibussowitsch   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1575d71ae5a4SJacob Faibussowitsch     a->nounused = (flg ? -1 : 0);
1576d71ae5a4SJacob Faibussowitsch     break;
1577d71ae5a4SJacob Faibussowitsch   default:
1578888c827cSStefano Zampini     break;
15792d61bbb3SSatish Balay   }
15803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15812d61bbb3SSatish Balay }
15822d61bbb3SSatish Balay 
158352768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
1584d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1585d71ae5a4SJacob Faibussowitsch {
158652768537SHong Zhang   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
158752768537SHong Zhang   MatScalar   *aa_i;
158887828ca2SBarry Smith   PetscScalar *v_i;
15892d61bbb3SSatish Balay 
15902d61bbb3SSatish Balay   PetscFunctionBegin;
1591d0f46423SBarry Smith   bs  = A->rmap->bs;
159252768537SHong Zhang   bs2 = bs * bs;
15935f80ce2aSJacob Faibussowitsch   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
15942d61bbb3SSatish Balay 
15952d61bbb3SSatish Balay   bn  = row / bs; /* Block number */
15962d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
15972d61bbb3SSatish Balay   M   = ai[bn + 1] - ai[bn];
15982d61bbb3SSatish Balay   *nz = bs * M;
15992d61bbb3SSatish Balay 
16002d61bbb3SSatish Balay   if (v) {
1601f4259b30SLisandro Dalcin     *v = NULL;
16022d61bbb3SSatish Balay     if (*nz) {
16039566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, v));
16042d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16052d61bbb3SSatish Balay         v_i  = *v + i * bs;
16062d61bbb3SSatish Balay         aa_i = aa + bs2 * (ai[bn] + i);
160726fbe8dcSKarl Rupp         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
16082d61bbb3SSatish Balay       }
16092d61bbb3SSatish Balay     }
16102d61bbb3SSatish Balay   }
16112d61bbb3SSatish Balay 
16122d61bbb3SSatish Balay   if (idx) {
1613f4259b30SLisandro Dalcin     *idx = NULL;
16142d61bbb3SSatish Balay     if (*nz) {
16159566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, idx));
16162d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16172d61bbb3SSatish Balay         idx_i = *idx + i * bs;
16182d61bbb3SSatish Balay         itmp  = bs * aj[ai[bn] + i];
161926fbe8dcSKarl Rupp         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
16202d61bbb3SSatish Balay       }
16212d61bbb3SSatish Balay     }
16222d61bbb3SSatish Balay   }
16233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16242d61bbb3SSatish Balay }
16252d61bbb3SSatish Balay 
1626d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1627d71ae5a4SJacob Faibussowitsch {
162852768537SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
162952768537SHong Zhang 
163052768537SHong Zhang   PetscFunctionBegin;
16319566063dSJacob Faibussowitsch   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
16323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
163352768537SHong Zhang }
163452768537SHong Zhang 
1635d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1636d71ae5a4SJacob Faibussowitsch {
16372d61bbb3SSatish Balay   PetscFunctionBegin;
16389566063dSJacob Faibussowitsch   if (idx) PetscCall(PetscFree(*idx));
16399566063dSJacob Faibussowitsch   if (v) PetscCall(PetscFree(*v));
16403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16412d61bbb3SSatish Balay }
16422d61bbb3SSatish Balay 
164366976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1644d71ae5a4SJacob Faibussowitsch {
164520e84f26SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
16462d61bbb3SSatish Balay   Mat          C;
164720e84f26SHong Zhang   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
164820e84f26SHong Zhang   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
164920e84f26SHong Zhang   MatScalar   *ata, *aa = a->a;
16502d61bbb3SSatish Balay 
16512d61bbb3SSatish Balay   PetscFunctionBegin;
16527fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
16539566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1654cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
165520e84f26SHong Zhang     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
16562d61bbb3SSatish Balay 
16579566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
16589566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
16599566063dSJacob Faibussowitsch     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
16609566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
166120e84f26SHong Zhang 
166220e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
166320e84f26SHong Zhang     ati = at->i;
166420e84f26SHong Zhang     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1665fc4dec0aSBarry Smith   } else {
1666fc4dec0aSBarry Smith     C   = *B;
166720e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
166820e84f26SHong Zhang     ati = at->i;
1669fc4dec0aSBarry Smith   }
1670fc4dec0aSBarry Smith 
167120e84f26SHong Zhang   atj = at->j;
167220e84f26SHong Zhang   ata = at->a;
167320e84f26SHong Zhang 
167420e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
16759566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(atfill, ati, nbs));
167620e84f26SHong Zhang 
167720e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
16782d61bbb3SSatish Balay   for (i = 0; i < mbs; i++) {
167920e84f26SHong Zhang     anzj = ai[i + 1] - ai[i];
168020e84f26SHong Zhang     for (j = 0; j < anzj; j++) {
168120e84f26SHong Zhang       atj[atfill[*aj]] = i;
168220e84f26SHong Zhang       for (kr = 0; kr < bs; kr++) {
1683ad540459SPierre Jolivet         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
16842d61bbb3SSatish Balay       }
168520e84f26SHong Zhang       atfill[*aj++] += 1;
168620e84f26SHong Zhang     }
168720e84f26SHong Zhang   }
16889566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
16899566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
16902d61bbb3SSatish Balay 
169120e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
16929566063dSJacob Faibussowitsch   PetscCall(PetscFree(atfill));
169320e84f26SHong Zhang 
1694cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
169558b7e2c1SStefano Zampini     PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
16962d61bbb3SSatish Balay     *B = C;
16972d61bbb3SSatish Balay   } else {
16989566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &C));
16992d61bbb3SSatish Balay   }
17003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17012d61bbb3SSatish Balay }
17022d61bbb3SSatish Balay 
1703ff6a9541SJacob Faibussowitsch static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1704d71ae5a4SJacob Faibussowitsch {
1705453d3561SHong Zhang   Mat Btrans;
1706453d3561SHong Zhang 
1707453d3561SHong Zhang   PetscFunctionBegin;
1708453d3561SHong Zhang   *f = PETSC_FALSE;
1709acd337a6SBarry Smith   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
17109566063dSJacob Faibussowitsch   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
17119566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Btrans));
17123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1713453d3561SHong Zhang }
1714453d3561SHong Zhang 
1715618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1716d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1717d71ae5a4SJacob Faibussowitsch {
1718b51a4376SLisandro Dalcin   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1719b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1720b51a4376SLisandro Dalcin   PetscInt    *rowlens, *colidxs;
1721b51a4376SLisandro Dalcin   PetscScalar *matvals;
17222593348eSBarry Smith 
17233a40ed3dSBarry Smith   PetscFunctionBegin;
17249566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
17253b2fbd54SBarry Smith 
1726b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1727b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1728b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1729b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1730b51a4376SLisandro Dalcin   nz = bs * bs * A->nz;
17312593348eSBarry Smith 
1732b51a4376SLisandro Dalcin   /* write matrix header */
1733b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
17349371c9d4SSatish Balay   header[1] = M;
17359371c9d4SSatish Balay   header[2] = N;
17369371c9d4SSatish Balay   header[3] = nz;
17379566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
17382593348eSBarry Smith 
1739b51a4376SLisandro Dalcin   /* store row lengths */
17409566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
1741b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
17429371c9d4SSatish Balay     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
17439566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
17449566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
1745b51a4376SLisandro Dalcin 
1746b51a4376SLisandro Dalcin   /* store column indices  */
17479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
1748b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1749b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1750b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
17519371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
17525f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
17539566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
17549566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
17552593348eSBarry Smith 
17562593348eSBarry Smith   /* store nonzero values */
17579566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
1758b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1759b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1760b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
17619371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
17625f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
17639566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
17649566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
1765ce6f0cecSBarry Smith 
1766b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
17679566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
17683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17692593348eSBarry Smith }
17702593348eSBarry Smith 
1771d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1772d71ae5a4SJacob Faibussowitsch {
17737dc0baabSHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
17747dc0baabSHong Zhang   PetscInt     i, bs = A->rmap->bs, k;
17757dc0baabSHong Zhang 
17767dc0baabSHong Zhang   PetscFunctionBegin;
17779566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
17787dc0baabSHong Zhang   for (i = 0; i < a->mbs; i++) {
17799566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
178048a46eb9SPierre Jolivet     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
17819566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
17827dc0baabSHong Zhang   }
17839566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
17843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17857dc0baabSHong Zhang }
17867dc0baabSHong Zhang 
1787d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1788d71ae5a4SJacob Faibussowitsch {
1789b6490206SBarry Smith   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1790d0f46423SBarry Smith   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1791f3ef73ceSBarry Smith   PetscViewerFormat format;
17922593348eSBarry Smith 
17933a40ed3dSBarry Smith   PetscFunctionBegin;
17947dc0baabSHong Zhang   if (A->structure_only) {
17959566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
17963ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
17977dc0baabSHong Zhang   }
17987dc0baabSHong Zhang 
17999566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
1800456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
18019566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1802fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1803ade3a672SBarry Smith     const char *matname;
1804bcd9e38bSBarry Smith     Mat         aij;
18059566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
18069566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
18079566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
18089566063dSJacob Faibussowitsch     PetscCall(MatView(aij, viewer));
18099566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&aij));
181004929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
18113ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1812fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
18139566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
181444cd7ae7SLois Curfman McInnes     for (i = 0; i < a->mbs; i++) {
181544cd7ae7SLois Curfman McInnes       for (j = 0; j < bs; j++) {
18169566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
181744cd7ae7SLois Curfman McInnes         for (k = a->i[i]; k < a->i[i + 1]; k++) {
181844cd7ae7SLois Curfman McInnes           for (l = 0; l < bs; l++) {
1819aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18200e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18219371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18220e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18239371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18240e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18259566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
18260ef38995SBarry Smith             }
182744cd7ae7SLois Curfman McInnes #else
182848a46eb9SPierre Jolivet             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
182944cd7ae7SLois Curfman McInnes #endif
183044cd7ae7SLois Curfman McInnes           }
183144cd7ae7SLois Curfman McInnes         }
18329566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
183344cd7ae7SLois Curfman McInnes       }
183444cd7ae7SLois Curfman McInnes     }
18359566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18360ef38995SBarry Smith   } else {
18379566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1838b6490206SBarry Smith     for (i = 0; i < a->mbs; i++) {
1839b6490206SBarry Smith       for (j = 0; j < bs; j++) {
18409566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1841b6490206SBarry Smith         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1842b6490206SBarry Smith           for (l = 0; l < bs; l++) {
1843aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18440e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
18459371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18460e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
18479371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18480ef38995SBarry Smith             } else {
18499566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
185088685aaeSLois Curfman McInnes             }
185188685aaeSLois Curfman McInnes #else
18529566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
185388685aaeSLois Curfman McInnes #endif
18542593348eSBarry Smith           }
18552593348eSBarry Smith         }
18569566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18572593348eSBarry Smith       }
18582593348eSBarry Smith     }
18599566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1860b6490206SBarry Smith   }
18619566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
18623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18632593348eSBarry Smith }
18642593348eSBarry Smith 
18659804daf3SBarry Smith #include <petscdraw.h>
1866d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1867d71ae5a4SJacob Faibussowitsch {
186877ed5343SBarry Smith   Mat               A = (Mat)Aa;
18693270192aSSatish Balay   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
18706497c311SBarry Smith   PetscInt          row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
18710e6d2581SBarry Smith   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
18723f1db9ecSBarry Smith   MatScalar        *aa;
1873b0a32e0cSBarry Smith   PetscViewer       viewer;
1874b3e7f47fSJed Brown   PetscViewerFormat format;
18756497c311SBarry Smith   int               color;
18763270192aSSatish Balay 
18773a40ed3dSBarry Smith   PetscFunctionBegin;
18789566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
18799566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
18809566063dSJacob Faibussowitsch   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
188177ed5343SBarry Smith 
18823270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1883b3e7f47fSJed Brown 
1884b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1885d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1886383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1887b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
18883270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
18893270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
18909371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
18919371c9d4SSatish Balay         y_r = y_l + 1.0;
18929371c9d4SSatish Balay         x_l = a->j[j] * bs;
18939371c9d4SSatish Balay         x_r = x_l + 1.0;
18943270192aSSatish Balay         aa  = a->a + j * bs2;
18953270192aSSatish Balay         for (k = 0; k < bs; k++) {
18963270192aSSatish Balay           for (l = 0; l < bs; l++) {
18970e6d2581SBarry Smith             if (PetscRealPart(*aa++) >= 0.) continue;
18989566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
18993270192aSSatish Balay           }
19003270192aSSatish Balay         }
19013270192aSSatish Balay       }
19023270192aSSatish Balay     }
1903b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
19043270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19053270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19069371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19079371c9d4SSatish Balay         y_r = y_l + 1.0;
19089371c9d4SSatish Balay         x_l = a->j[j] * bs;
19099371c9d4SSatish Balay         x_r = x_l + 1.0;
19103270192aSSatish Balay         aa  = a->a + j * bs2;
19113270192aSSatish Balay         for (k = 0; k < bs; k++) {
19123270192aSSatish Balay           for (l = 0; l < bs; l++) {
19130e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
19149566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19153270192aSSatish Balay           }
19163270192aSSatish Balay         }
19173270192aSSatish Balay       }
19183270192aSSatish Balay     }
1919b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
19203270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19213270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19229371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19239371c9d4SSatish Balay         y_r = y_l + 1.0;
19249371c9d4SSatish Balay         x_l = a->j[j] * bs;
19259371c9d4SSatish Balay         x_r = x_l + 1.0;
19263270192aSSatish Balay         aa  = a->a + j * bs2;
19273270192aSSatish Balay         for (k = 0; k < bs; k++) {
19283270192aSSatish Balay           for (l = 0; l < bs; l++) {
19290e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
19309566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19313270192aSSatish Balay           }
19323270192aSSatish Balay         }
19333270192aSSatish Balay       }
19343270192aSSatish Balay     }
1935d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
1936b3e7f47fSJed Brown   } else {
1937b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
1938b3e7f47fSJed Brown     /* first determine max of all nonzero values */
1939b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
1940b3e7f47fSJed Brown     PetscDraw popup;
1941b3e7f47fSJed Brown 
1942b3e7f47fSJed Brown     for (i = 0; i < a->nz * a->bs2; i++) {
1943b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1944b3e7f47fSJed Brown     }
1945383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
19469566063dSJacob Faibussowitsch     PetscCall(PetscDrawGetPopup(draw, &popup));
19479566063dSJacob Faibussowitsch     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1948383922c3SLisandro Dalcin 
1949d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1950b3e7f47fSJed Brown     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1951b3e7f47fSJed Brown       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19529371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19539371c9d4SSatish Balay         y_r = y_l + 1.0;
19549371c9d4SSatish Balay         x_l = a->j[j] * bs;
19559371c9d4SSatish Balay         x_r = x_l + 1.0;
1956b3e7f47fSJed Brown         aa  = a->a + j * bs2;
1957b3e7f47fSJed Brown         for (k = 0; k < bs; k++) {
1958b3e7f47fSJed Brown           for (l = 0; l < bs; l++) {
1959383922c3SLisandro Dalcin             MatScalar v = *aa++;
1960383922c3SLisandro Dalcin             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
19619566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1962b3e7f47fSJed Brown           }
1963b3e7f47fSJed Brown         }
1964b3e7f47fSJed Brown       }
1965b3e7f47fSJed Brown     }
1966d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
1967b3e7f47fSJed Brown   }
19683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
196977ed5343SBarry Smith }
19703270192aSSatish Balay 
1971d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
1972d71ae5a4SJacob Faibussowitsch {
19730e6d2581SBarry Smith   PetscReal xl, yl, xr, yr, w, h;
1974b0a32e0cSBarry Smith   PetscDraw draw;
1975ace3abfcSBarry Smith   PetscBool isnull;
19763270192aSSatish Balay 
197777ed5343SBarry Smith   PetscFunctionBegin;
19789566063dSJacob Faibussowitsch   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
19799566063dSJacob Faibussowitsch   PetscCall(PetscDrawIsNull(draw, &isnull));
19803ba16761SJacob Faibussowitsch   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
198177ed5343SBarry Smith 
19829371c9d4SSatish Balay   xr = A->cmap->n;
19839371c9d4SSatish Balay   yr = A->rmap->N;
19849371c9d4SSatish Balay   h  = yr / 10.0;
19859371c9d4SSatish Balay   w  = xr / 10.0;
19869371c9d4SSatish Balay   xr += w;
19879371c9d4SSatish Balay   yr += h;
19889371c9d4SSatish Balay   xl = -w;
19899371c9d4SSatish Balay   yl = -h;
19909566063dSJacob Faibussowitsch   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
19919566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
19929566063dSJacob Faibussowitsch   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
19939566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
19949566063dSJacob Faibussowitsch   PetscCall(PetscDrawSave(draw));
19953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19963270192aSSatish Balay }
19973270192aSSatish Balay 
1998d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
1999d71ae5a4SJacob Faibussowitsch {
20009f196a02SMartin Diehl   PetscBool isascii, isbinary, isdraw;
20012593348eSBarry Smith 
20023a40ed3dSBarry Smith   PetscFunctionBegin;
20039f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
20049566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
20059566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
20069f196a02SMartin Diehl   if (isascii) {
20079566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
20080f5bd95cSBarry Smith   } else if (isbinary) {
20099566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
20100f5bd95cSBarry Smith   } else if (isdraw) {
20119566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
20125cd90555SBarry Smith   } else {
2013a5e6ed63SBarry Smith     Mat B;
20149566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
20159566063dSJacob Faibussowitsch     PetscCall(MatView(B, viewer));
20169566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
20172593348eSBarry Smith   }
20183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20192593348eSBarry Smith }
2020b6490206SBarry Smith 
2021d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2022d71ae5a4SJacob Faibussowitsch {
2023cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2024c1ac3661SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2025c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
2026d0f46423SBarry Smith   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
202797e567efSBarry Smith   MatScalar   *ap, *aa = a->a;
2028cd0e1443SSatish Balay 
20293a40ed3dSBarry Smith   PetscFunctionBegin;
20302d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over rows */
20319371c9d4SSatish Balay     row  = im[k];
20329371c9d4SSatish Balay     brow = row / bs;
20339371c9d4SSatish Balay     if (row < 0) {
20349371c9d4SSatish Balay       v += n;
20359371c9d4SSatish Balay       continue;
20369371c9d4SSatish Balay     } /* negative row */
203754c59aa7SJacob Faibussowitsch     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
20388e3a54c0SPierre Jolivet     rp   = PetscSafePointerPlusOffset(aj, ai[brow]);
20398e3a54c0SPierre Jolivet     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
20402c3acbe9SBarry Smith     nrow = ailen[brow];
20412d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over columns */
20429371c9d4SSatish Balay       if (in[l] < 0) {
20439371c9d4SSatish Balay         v++;
20449371c9d4SSatish Balay         continue;
20459371c9d4SSatish Balay       } /* negative column */
204654c59aa7SJacob Faibussowitsch       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
20472d61bbb3SSatish Balay       col  = in[l];
20482d61bbb3SSatish Balay       bcol = col / bs;
20492d61bbb3SSatish Balay       cidx = col % bs;
20502d61bbb3SSatish Balay       ridx = row % bs;
20512d61bbb3SSatish Balay       high = nrow;
20522d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
20532d61bbb3SSatish Balay       while (high - low > 5) {
2054cd0e1443SSatish Balay         t = (low + high) / 2;
2055cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
2056cd0e1443SSatish Balay         else low = t;
2057cd0e1443SSatish Balay       }
2058cd0e1443SSatish Balay       for (i = low; i < high; i++) {
2059cd0e1443SSatish Balay         if (rp[i] > bcol) break;
2060cd0e1443SSatish Balay         if (rp[i] == bcol) {
20612d61bbb3SSatish Balay           *v++ = ap[bs2 * i + bs * cidx + ridx];
20622d61bbb3SSatish Balay           goto finished;
2063cd0e1443SSatish Balay         }
2064cd0e1443SSatish Balay       }
206597e567efSBarry Smith       *v++ = 0.0;
20662d61bbb3SSatish Balay     finished:;
2067cd0e1443SSatish Balay     }
2068cd0e1443SSatish Balay   }
20693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2070cd0e1443SSatish Balay }
2071cd0e1443SSatish Balay 
2072d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2073d71ae5a4SJacob Faibussowitsch {
207492c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2075e2ee6c50SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2076c1ac3661SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2077d0f46423SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2078ace3abfcSBarry Smith   PetscBool          roworiented = a->roworiented;
2079dd6ea824SBarry Smith   const PetscScalar *value       = v;
20809d243f67SHong Zhang   MatScalar         *ap = NULL, *aa = a->a, *bap;
208192c4ed94SBarry Smith 
20823a40ed3dSBarry Smith   PetscFunctionBegin;
20830e324ae4SSatish Balay   if (roworiented) {
20840e324ae4SSatish Balay     stepval = (n - 1) * bs;
20850e324ae4SSatish Balay   } else {
20860e324ae4SSatish Balay     stepval = (m - 1) * bs;
20870e324ae4SSatish Balay   }
208892c4ed94SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
208992c4ed94SBarry Smith     row = im[k];
20905ef9f2a5SBarry Smith     if (row < 0) continue;
20916bdcaf15SBarry Smith     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
209292c4ed94SBarry Smith     rp = aj + ai[row];
20937dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[row];
209492c4ed94SBarry Smith     rmax = imax[row];
209592c4ed94SBarry Smith     nrow = ailen[row];
209692c4ed94SBarry Smith     low  = 0;
2097c71e6ed7SBarry Smith     high = nrow;
209892c4ed94SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
20995ef9f2a5SBarry Smith       if (in[l] < 0) continue;
21006bdcaf15SBarry Smith       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
210192c4ed94SBarry Smith       col = in[l];
21027dc0baabSHong Zhang       if (!A->structure_only) {
210392c4ed94SBarry Smith         if (roworiented) {
210453ef36baSBarry Smith           value = v + (k * (stepval + bs) + l) * bs;
21050e324ae4SSatish Balay         } else {
210653ef36baSBarry Smith           value = v + (l * (stepval + bs) + k) * bs;
210792c4ed94SBarry Smith         }
21087dc0baabSHong Zhang       }
210926fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
211026fbe8dcSKarl Rupp       else high = nrow;
2111e2ee6c50SBarry Smith       lastcol = col;
211292c4ed94SBarry Smith       while (high - low > 7) {
211392c4ed94SBarry Smith         t = (low + high) / 2;
211492c4ed94SBarry Smith         if (rp[t] > col) high = t;
211592c4ed94SBarry Smith         else low = t;
211692c4ed94SBarry Smith       }
211792c4ed94SBarry Smith       for (i = low; i < high; i++) {
211892c4ed94SBarry Smith         if (rp[i] > col) break;
211992c4ed94SBarry Smith         if (rp[i] == col) {
21207dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
21218a84c255SSatish Balay           bap = ap + bs2 * i;
21220e324ae4SSatish Balay           if (roworiented) {
21238a84c255SSatish Balay             if (is == ADD_VALUES) {
2124dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2125ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2126dd9472c6SBarry Smith               }
21270e324ae4SSatish Balay             } else {
2128dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2129ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2130dd9472c6SBarry Smith               }
2131dd9472c6SBarry Smith             }
21320e324ae4SSatish Balay           } else {
21330e324ae4SSatish Balay             if (is == ADD_VALUES) {
213453ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2135ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
213653ef36baSBarry Smith                 bap += bs;
2137dd9472c6SBarry Smith               }
21380e324ae4SSatish Balay             } else {
213953ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2140ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
214153ef36baSBarry Smith                 bap += bs;
21428a84c255SSatish Balay               }
2143dd9472c6SBarry Smith             }
2144dd9472c6SBarry Smith           }
2145f1241b54SBarry Smith           goto noinsert2;
214692c4ed94SBarry Smith         }
214792c4ed94SBarry Smith       }
214889280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
21495f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
21507dc0baabSHong Zhang       if (A->structure_only) {
21517dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
21527dc0baabSHong Zhang       } else {
2153fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
21547dc0baabSHong Zhang       }
21559371c9d4SSatish Balay       N = nrow++ - 1;
21569371c9d4SSatish Balay       high++;
215792c4ed94SBarry Smith       /* shift up all the later entries in this row */
21589566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
215992c4ed94SBarry Smith       rp[i] = col;
21607dc0baabSHong Zhang       if (!A->structure_only) {
21619566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
21628a84c255SSatish Balay         bap = ap + bs2 * i;
21630e324ae4SSatish Balay         if (roworiented) {
2164dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2165ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2166dd9472c6SBarry Smith           }
21670e324ae4SSatish Balay         } else {
2168dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2169ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2170dd9472c6SBarry Smith           }
2171dd9472c6SBarry Smith         }
21727dc0baabSHong Zhang       }
2173f1241b54SBarry Smith     noinsert2:;
217492c4ed94SBarry Smith       low = i;
217592c4ed94SBarry Smith     }
217692c4ed94SBarry Smith     ailen[row] = nrow;
217792c4ed94SBarry Smith   }
21783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
217992c4ed94SBarry Smith }
218026e093fcSHong Zhang 
2181d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2182d71ae5a4SJacob Faibussowitsch {
2183584200bdSSatish Balay   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2184580bdb30SBarry Smith   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2185d0f46423SBarry Smith   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2186c1ac3661SBarry Smith   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
21873f1db9ecSBarry Smith   MatScalar   *aa    = a->a, *ap;
21883447b6efSHong Zhang   PetscReal    ratio = 0.6;
2189584200bdSSatish Balay 
21903a40ed3dSBarry Smith   PetscFunctionBegin;
2191d32568d8SPierre Jolivet   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2192584200bdSSatish Balay 
219343ee02c3SBarry Smith   if (m) rmax = ailen[0];
2194584200bdSSatish Balay   for (i = 1; i < mbs; i++) {
2195584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
2196584200bdSSatish Balay     fshift += imax[i - 1] - ailen[i - 1];
2197d402145bSBarry Smith     rmax = PetscMax(rmax, ailen[i]);
2198584200bdSSatish Balay     if (fshift) {
2199580bdb30SBarry Smith       ip = aj + ai[i];
2200580bdb30SBarry Smith       ap = aa + bs2 * ai[i];
2201584200bdSSatish Balay       N  = ailen[i];
22029566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(ip - fshift, ip, N));
220348a46eb9SPierre Jolivet       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2204672ba085SHong Zhang     }
2205584200bdSSatish Balay     ai[i] = ai[i - 1] + ailen[i - 1];
2206584200bdSSatish Balay   }
2207584200bdSSatish Balay   if (mbs) {
2208584200bdSSatish Balay     fshift += imax[mbs - 1] - ailen[mbs - 1];
2209584200bdSSatish Balay     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2210584200bdSSatish Balay   }
22117c565772SBarry Smith 
2212584200bdSSatish Balay   /* reset ilen and imax for each row */
22137c565772SBarry Smith   a->nonzerorowcnt = 0;
2214672ba085SHong Zhang   if (A->structure_only) {
22159566063dSJacob Faibussowitsch     PetscCall(PetscFree2(a->imax, a->ilen));
2216672ba085SHong Zhang   } else { /* !A->structure_only */
2217584200bdSSatish Balay     for (i = 0; i < mbs; i++) {
2218584200bdSSatish Balay       ailen[i] = imax[i] = ai[i + 1] - ai[i];
22197c565772SBarry Smith       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2220584200bdSSatish Balay     }
2221672ba085SHong Zhang   }
2222a7c10996SSatish Balay   a->nz = ai[mbs];
2223584200bdSSatish Balay 
2224584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
2225b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
22265f80ce2aSJacob Faibussowitsch   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
22279566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
22289566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
22299566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
223026fbe8dcSKarl Rupp 
22318e58a170SBarry Smith   A->info.mallocs += a->reallocs;
2232e2f3b5e9SSatish Balay   a->reallocs         = 0;
22330e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2234647a6520SHong Zhang   a->rmax             = rmax;
2235cf4441caSHong Zhang 
223648a46eb9SPierre Jolivet   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
22373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2238584200bdSSatish Balay }
2239584200bdSSatish Balay 
2240bea157c4SSatish Balay /*
2241bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2242bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2243a5b23f4aSJose E. Roman    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2244bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2245bea157c4SSatish Balay */
2246d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2247d71ae5a4SJacob Faibussowitsch {
2248ff6a9541SJacob Faibussowitsch   PetscInt j = 0;
22493a40ed3dSBarry Smith 
2250433994e6SBarry Smith   PetscFunctionBegin;
2251ff6a9541SJacob Faibussowitsch   for (PetscInt i = 0; i < n; j++) {
2252ff6a9541SJacob Faibussowitsch     PetscInt row = idx[i];
2253a5b23f4aSJose E. Roman     if (row % bs != 0) { /* Not the beginning of a block */
2254bea157c4SSatish Balay       sizes[j] = 1;
2255bea157c4SSatish Balay       i++;
2256e4fda26cSSatish Balay     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2257bea157c4SSatish Balay       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2258bea157c4SSatish Balay       i++;
22596aad120cSJose E. Roman     } else { /* Beginning of the block, so check if the complete block exists */
2260ff6a9541SJacob Faibussowitsch       PetscBool flg = PETSC_TRUE;
2261ff6a9541SJacob Faibussowitsch       for (PetscInt k = 1; k < bs; k++) {
2262bea157c4SSatish Balay         if (row + k != idx[i + k]) { /* break in the block */
2263bea157c4SSatish Balay           flg = PETSC_FALSE;
2264bea157c4SSatish Balay           break;
2265d9b7c43dSSatish Balay         }
2266bea157c4SSatish Balay       }
2267abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2268bea157c4SSatish Balay         sizes[j] = bs;
2269bea157c4SSatish Balay         i += bs;
2270bea157c4SSatish Balay       } else {
2271bea157c4SSatish Balay         sizes[j] = 1;
2272bea157c4SSatish Balay         i++;
2273bea157c4SSatish Balay       }
2274bea157c4SSatish Balay     }
2275bea157c4SSatish Balay   }
2276bea157c4SSatish Balay   *bs_max = j;
22773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2278d9b7c43dSSatish Balay }
2279d9b7c43dSSatish Balay 
2280d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2281d71ae5a4SJacob Faibussowitsch {
2282d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2283f4df32b1SMatthew Knepley   PetscInt           i, j, k, count, *rows;
2284d0f46423SBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
228587828ca2SBarry Smith   PetscScalar        zero = 0.0;
22863f1db9ecSBarry Smith   MatScalar         *aa;
228797b48c8fSBarry Smith   const PetscScalar *xx;
228897b48c8fSBarry Smith   PetscScalar       *bb;
2289d9b7c43dSSatish Balay 
22903a40ed3dSBarry Smith   PetscFunctionBegin;
2291dd8e379bSPierre Jolivet   /* fix right-hand side if needed */
229297b48c8fSBarry Smith   if (x && b) {
22939566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
22949566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
2295ad540459SPierre Jolivet     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
22969566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
22979566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
229897b48c8fSBarry Smith   }
229997b48c8fSBarry Smith 
2300d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2301bea157c4SSatish Balay   /* allocate memory for rows,sizes */
23029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2303bea157c4SSatish Balay 
2304563b5814SBarry Smith   /* copy IS values to rows, and sort them */
230526fbe8dcSKarl Rupp   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
23069566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(is_n, rows));
230797b48c8fSBarry Smith 
2308a9817697SBarry Smith   if (baij->keepnonzeropattern) {
230926fbe8dcSKarl Rupp     for (i = 0; i < is_n; i++) sizes[i] = 1;
2310dffd3267SBarry Smith     bs_max = is_n;
2311dffd3267SBarry Smith   } else {
23129566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2313e56f5c9eSBarry Smith     A->nonzerostate++;
2314dffd3267SBarry Smith   }
2315bea157c4SSatish Balay 
2316bea157c4SSatish Balay   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2317bea157c4SSatish Balay     row = rows[j];
23185f80ce2aSJacob Faibussowitsch     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2319bea157c4SSatish Balay     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2320835f2295SStefano Zampini     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2321a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2322d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2323bea157c4SSatish Balay         if (baij->ilen[row / bs] > 0) {
2324bea157c4SSatish Balay           baij->ilen[row / bs]       = 1;
2325bea157c4SSatish Balay           baij->j[baij->i[row / bs]] = row / bs;
232626fbe8dcSKarl Rupp 
23279566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(aa, count * bs));
2328a07cd24cSSatish Balay         }
2329563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
23309927e4dfSBarry Smith         for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2331f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2332bea157c4SSatish Balay         baij->ilen[row / bs] = 0;
2333f4df32b1SMatthew Knepley       } /* end (diag == 0.0) */
2334bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
23356bdcaf15SBarry Smith       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2336bea157c4SSatish Balay       for (k = 0; k < count; k++) {
2337d9b7c43dSSatish Balay         aa[0] = zero;
2338d9b7c43dSSatish Balay         aa += bs;
2339d9b7c43dSSatish Balay       }
23409927e4dfSBarry Smith       if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2341d9b7c43dSSatish Balay     }
2342bea157c4SSatish Balay   }
2343bea157c4SSatish Balay 
23449566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, sizes));
23459566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
23463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2347d9b7c43dSSatish Balay }
23481c351548SSatish Balay 
2349ff6a9541SJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2350d71ae5a4SJacob Faibussowitsch {
235197b48c8fSBarry Smith   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
235297b48c8fSBarry Smith   PetscInt           i, j, k, count;
235397b48c8fSBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
235497b48c8fSBarry Smith   PetscScalar        zero = 0.0;
235597b48c8fSBarry Smith   MatScalar         *aa;
235697b48c8fSBarry Smith   const PetscScalar *xx;
235797b48c8fSBarry Smith   PetscScalar       *bb;
235856777dd2SBarry Smith   PetscBool         *zeroed, vecs = PETSC_FALSE;
235997b48c8fSBarry Smith 
236097b48c8fSBarry Smith   PetscFunctionBegin;
2361dd8e379bSPierre Jolivet   /* fix right-hand side if needed */
236297b48c8fSBarry Smith   if (x && b) {
23639566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23649566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
236556777dd2SBarry Smith     vecs = PETSC_TRUE;
236697b48c8fSBarry Smith   }
236797b48c8fSBarry Smith 
236897b48c8fSBarry Smith   /* zero the columns */
23699566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
237097b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
23715f80ce2aSJacob Faibussowitsch     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
237297b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
237397b48c8fSBarry Smith   }
237497b48c8fSBarry Smith   for (i = 0; i < A->rmap->N; i++) {
237597b48c8fSBarry Smith     if (!zeroed[i]) {
237697b48c8fSBarry Smith       row = i / bs;
237797b48c8fSBarry Smith       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
237897b48c8fSBarry Smith         for (k = 0; k < bs; k++) {
237997b48c8fSBarry Smith           col = bs * baij->j[j] + k;
238097b48c8fSBarry Smith           if (zeroed[col]) {
2381835f2295SStefano Zampini             aa = baij->a + j * bs2 + (i % bs) + bs * k;
238256777dd2SBarry Smith             if (vecs) bb[i] -= aa[0] * xx[col];
238397b48c8fSBarry Smith             aa[0] = 0.0;
238497b48c8fSBarry Smith           }
238597b48c8fSBarry Smith         }
238697b48c8fSBarry Smith       }
238756777dd2SBarry Smith     } else if (vecs) bb[i] = diag * xx[i];
238897b48c8fSBarry Smith   }
23899566063dSJacob Faibussowitsch   PetscCall(PetscFree(zeroed));
239056777dd2SBarry Smith   if (vecs) {
23919566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
23929566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
239356777dd2SBarry Smith   }
239497b48c8fSBarry Smith 
239597b48c8fSBarry Smith   /* zero the rows */
239697b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
239797b48c8fSBarry Smith     row   = is_idx[i];
239897b48c8fSBarry Smith     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2399835f2295SStefano Zampini     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
240097b48c8fSBarry Smith     for (k = 0; k < count; k++) {
240197b48c8fSBarry Smith       aa[0] = zero;
240297b48c8fSBarry Smith       aa += bs;
240397b48c8fSBarry Smith     }
2404dbbe0bcdSBarry Smith     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
240597b48c8fSBarry Smith   }
24069566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
24073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
240897b48c8fSBarry Smith }
240997b48c8fSBarry Smith 
2410d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2411d71ae5a4SJacob Faibussowitsch {
24122d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2413e2ee6c50SBarry Smith   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2414c1ac3661SBarry Smith   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2415d0f46423SBarry Smith   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2416c1ac3661SBarry Smith   PetscInt     ridx, cidx, bs2                 = a->bs2;
2417ace3abfcSBarry Smith   PetscBool    roworiented = a->roworiented;
2418d8cdefa3SHong Zhang   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
24192d61bbb3SSatish Balay 
24202d61bbb3SSatish Balay   PetscFunctionBegin;
24212d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over added rows */
2422085a36d4SBarry Smith     row  = im[k];
2423085a36d4SBarry Smith     brow = row / bs;
24245ef9f2a5SBarry Smith     if (row < 0) continue;
24256bdcaf15SBarry Smith     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
24268e3a54c0SPierre Jolivet     rp = PetscSafePointerPlusOffset(aj, ai[brow]);
24278e3a54c0SPierre Jolivet     if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
24282d61bbb3SSatish Balay     rmax = imax[brow];
24292d61bbb3SSatish Balay     nrow = ailen[brow];
24302d61bbb3SSatish Balay     low  = 0;
2431c71e6ed7SBarry Smith     high = nrow;
24322d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over added columns */
24335ef9f2a5SBarry Smith       if (in[l] < 0) continue;
24346bdcaf15SBarry Smith       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
24359371c9d4SSatish Balay       col  = in[l];
24369371c9d4SSatish Balay       bcol = col / bs;
24379371c9d4SSatish Balay       ridx = row % bs;
24389371c9d4SSatish Balay       cidx = col % bs;
2439672ba085SHong Zhang       if (!A->structure_only) {
24402d61bbb3SSatish Balay         if (roworiented) {
24415ef9f2a5SBarry Smith           value = v[l + k * n];
24422d61bbb3SSatish Balay         } else {
24432d61bbb3SSatish Balay           value = v[k + l * m];
24442d61bbb3SSatish Balay         }
2445672ba085SHong Zhang       }
24469371c9d4SSatish Balay       if (col <= lastcol) low = 0;
24479371c9d4SSatish Balay       else high = nrow;
2448e2ee6c50SBarry Smith       lastcol = col;
24492d61bbb3SSatish Balay       while (high - low > 7) {
24502d61bbb3SSatish Balay         t = (low + high) / 2;
24512d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
24522d61bbb3SSatish Balay         else low = t;
24532d61bbb3SSatish Balay       }
24542d61bbb3SSatish Balay       for (i = low; i < high; i++) {
24552d61bbb3SSatish Balay         if (rp[i] > bcol) break;
24562d61bbb3SSatish Balay         if (rp[i] == bcol) {
24578e3a54c0SPierre Jolivet           bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2458672ba085SHong Zhang           if (!A->structure_only) {
24592d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
24602d61bbb3SSatish Balay             else *bap = value;
2461672ba085SHong Zhang           }
24622d61bbb3SSatish Balay           goto noinsert1;
24632d61bbb3SSatish Balay         }
24642d61bbb3SSatish Balay       }
24652d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
24665f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2467672ba085SHong Zhang       if (A->structure_only) {
2468672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2469672ba085SHong Zhang       } else {
2470fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2471672ba085SHong Zhang       }
24729371c9d4SSatish Balay       N = nrow++ - 1;
24739371c9d4SSatish Balay       high++;
24742d61bbb3SSatish Balay       /* shift up all the later entries in this row */
24759566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
24762d61bbb3SSatish Balay       rp[i] = bcol;
2477580bdb30SBarry Smith       if (!A->structure_only) {
24789566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
24799566063dSJacob Faibussowitsch         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2480580bdb30SBarry Smith         ap[bs2 * i + bs * cidx + ridx] = value;
2481580bdb30SBarry Smith       }
2482085a36d4SBarry Smith       a->nz++;
24832d61bbb3SSatish Balay     noinsert1:;
24842d61bbb3SSatish Balay       low = i;
24852d61bbb3SSatish Balay     }
24862d61bbb3SSatish Balay     ailen[brow] = nrow;
24872d61bbb3SSatish Balay   }
24883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24892d61bbb3SSatish Balay }
24902d61bbb3SSatish Balay 
2491ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2492d71ae5a4SJacob Faibussowitsch {
24932d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
24942d61bbb3SSatish Balay   Mat          outA;
2495ace3abfcSBarry Smith   PetscBool    row_identity, col_identity;
24962d61bbb3SSatish Balay 
24972d61bbb3SSatish Balay   PetscFunctionBegin;
24985f80ce2aSJacob Faibussowitsch   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
24999566063dSJacob Faibussowitsch   PetscCall(ISIdentity(row, &row_identity));
25009566063dSJacob Faibussowitsch   PetscCall(ISIdentity(col, &col_identity));
25015f80ce2aSJacob Faibussowitsch   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
25022d61bbb3SSatish Balay 
25032d61bbb3SSatish Balay   outA            = inA;
2504d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
25059566063dSJacob Faibussowitsch   PetscCall(PetscFree(inA->solvertype));
25069566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
25072d61bbb3SSatish Balay 
25089566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)row));
25099566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
2510c3122656SLisandro Dalcin   a->row = row;
25119566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)col));
25129566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
2513c3122656SLisandro Dalcin   a->col = col;
2514c38d4ed2SBarry Smith 
2515c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
25169566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
25179566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2518c38d4ed2SBarry Smith 
25199566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2520aa624791SPierre Jolivet   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
25219566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(outA, inA, info));
25223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25232d61bbb3SSatish Balay }
2524d9b7c43dSSatish Balay 
2525ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2526d71ae5a4SJacob Faibussowitsch {
252727a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
252827a8da17SBarry Smith 
252927a8da17SBarry Smith   PetscFunctionBegin;
2530ff6a9541SJacob Faibussowitsch   baij->nz = baij->maxnz;
2531ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2532ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
25333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
253427a8da17SBarry Smith }
253527a8da17SBarry Smith 
253627a8da17SBarry Smith /*@
2537d8a51d2aSBarry Smith   MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
253827a8da17SBarry Smith 
253927a8da17SBarry Smith   Input Parameters:
254011a5261eSBarry Smith + mat     - the `MATSEQBAIJ` matrix
2541d8a51d2aSBarry Smith - indices - the block column indices
254227a8da17SBarry Smith 
254315091d37SBarry Smith   Level: advanced
254415091d37SBarry Smith 
254527a8da17SBarry Smith   Notes:
254627a8da17SBarry Smith   This can be called if you have precomputed the nonzero structure of the
254727a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
254811a5261eSBarry Smith   of the `MatSetValues()` operation.
254927a8da17SBarry Smith 
255027a8da17SBarry Smith   You MUST have set the correct numbers of nonzeros per row in the call to
255111a5261eSBarry Smith   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
255227a8da17SBarry Smith 
255311a5261eSBarry Smith   MUST be called before any calls to `MatSetValues()`
255427a8da17SBarry Smith 
25551cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
255627a8da17SBarry Smith @*/
2557d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2558d71ae5a4SJacob Faibussowitsch {
255927a8da17SBarry Smith   PetscFunctionBegin;
25600700a824SBarry Smith   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
25614f572ea9SToby Isaac   PetscAssertPointer(indices, 2);
2562810441c8SPierre Jolivet   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
25633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
256427a8da17SBarry Smith }
256527a8da17SBarry Smith 
256666976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2567d71ae5a4SJacob Faibussowitsch {
2568273d9f13SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2569c1ac3661SBarry Smith   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2570273d9f13SBarry Smith   PetscReal    atmp;
257187828ca2SBarry Smith   PetscScalar *x, zero = 0.0;
2572273d9f13SBarry Smith   MatScalar   *aa;
2573c1ac3661SBarry Smith   PetscInt     ncols, brow, krow, kcol;
2574273d9f13SBarry Smith 
2575273d9f13SBarry Smith   PetscFunctionBegin;
25765f80ce2aSJacob Faibussowitsch   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2577d0f46423SBarry Smith   bs  = A->rmap->bs;
2578273d9f13SBarry Smith   aa  = a->a;
2579273d9f13SBarry Smith   ai  = a->i;
2580273d9f13SBarry Smith   aj  = a->j;
2581273d9f13SBarry Smith   mbs = a->mbs;
2582273d9f13SBarry Smith 
25839566063dSJacob Faibussowitsch   PetscCall(VecSet(v, zero));
25849566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
25859566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(v, &n));
25865f80ce2aSJacob Faibussowitsch   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2587273d9f13SBarry Smith   for (i = 0; i < mbs; i++) {
25889371c9d4SSatish Balay     ncols = ai[1] - ai[0];
25899371c9d4SSatish Balay     ai++;
2590273d9f13SBarry Smith     brow = bs * i;
2591273d9f13SBarry Smith     for (j = 0; j < ncols; j++) {
2592273d9f13SBarry Smith       for (kcol = 0; kcol < bs; kcol++) {
2593273d9f13SBarry Smith         for (krow = 0; krow < bs; krow++) {
25949371c9d4SSatish Balay           atmp = PetscAbsScalar(*aa);
25959371c9d4SSatish Balay           aa++;
2596273d9f13SBarry Smith           row = brow + krow; /* row index */
25979371c9d4SSatish Balay           if (PetscAbsScalar(x[row]) < atmp) {
25989371c9d4SSatish Balay             x[row] = atmp;
25999371c9d4SSatish Balay             if (idx) idx[row] = bs * (*aj) + kcol;
26009371c9d4SSatish Balay           }
2601273d9f13SBarry Smith         }
2602273d9f13SBarry Smith       }
2603273d9f13SBarry Smith       aj++;
2604273d9f13SBarry Smith     }
2605273d9f13SBarry Smith   }
26069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
26073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2608273d9f13SBarry Smith }
2609273d9f13SBarry Smith 
2610eede4a3fSMark Adams static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2611eede4a3fSMark Adams {
2612eede4a3fSMark Adams   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2613eede4a3fSMark Adams   PetscInt     i, j, n, row, bs, *ai, mbs;
2614eede4a3fSMark Adams   PetscReal    atmp;
2615eede4a3fSMark Adams   PetscScalar *x, zero = 0.0;
2616eede4a3fSMark Adams   MatScalar   *aa;
2617eede4a3fSMark Adams   PetscInt     ncols, brow, krow, kcol;
2618eede4a3fSMark Adams 
2619eede4a3fSMark Adams   PetscFunctionBegin;
2620eede4a3fSMark Adams   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2621eede4a3fSMark Adams   bs  = A->rmap->bs;
2622eede4a3fSMark Adams   aa  = a->a;
2623eede4a3fSMark Adams   ai  = a->i;
2624eede4a3fSMark Adams   mbs = a->mbs;
2625eede4a3fSMark Adams 
2626eede4a3fSMark Adams   PetscCall(VecSet(v, zero));
2627eede4a3fSMark Adams   PetscCall(VecGetArrayWrite(v, &x));
2628eede4a3fSMark Adams   PetscCall(VecGetLocalSize(v, &n));
2629eede4a3fSMark Adams   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2630eede4a3fSMark Adams   for (i = 0; i < mbs; i++) {
2631eede4a3fSMark Adams     ncols = ai[1] - ai[0];
2632eede4a3fSMark Adams     ai++;
2633eede4a3fSMark Adams     brow = bs * i;
2634eede4a3fSMark Adams     for (j = 0; j < ncols; j++) {
2635eede4a3fSMark Adams       for (kcol = 0; kcol < bs; kcol++) {
2636eede4a3fSMark Adams         for (krow = 0; krow < bs; krow++) {
2637eede4a3fSMark Adams           atmp = PetscAbsScalar(*aa);
2638eede4a3fSMark Adams           aa++;
2639eede4a3fSMark Adams           row = brow + krow; /* row index */
2640eede4a3fSMark Adams           x[row] += atmp;
2641eede4a3fSMark Adams         }
2642eede4a3fSMark Adams       }
2643eede4a3fSMark Adams     }
2644eede4a3fSMark Adams   }
2645eede4a3fSMark Adams   PetscCall(VecRestoreArrayWrite(v, &x));
2646eede4a3fSMark Adams   PetscFunctionReturn(PETSC_SUCCESS);
2647eede4a3fSMark Adams }
2648eede4a3fSMark Adams 
264966976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2650d71ae5a4SJacob Faibussowitsch {
26513c896bc6SHong Zhang   PetscFunctionBegin;
26523c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
26533c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
26543c896bc6SHong Zhang     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
26553c896bc6SHong Zhang     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2656d88c0aacSHong Zhang     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
26573c896bc6SHong Zhang 
26585f80ce2aSJacob Faibussowitsch     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
26595f80ce2aSJacob Faibussowitsch     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
26609566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
26619566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)B));
26623c896bc6SHong Zhang   } else {
26639566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
26643c896bc6SHong Zhang   }
26653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
26663c896bc6SHong Zhang }
26673c896bc6SHong Zhang 
2668d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2669d71ae5a4SJacob Faibussowitsch {
2670f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
26716e111a19SKarl Rupp 
2672f2a5309cSSatish Balay   PetscFunctionBegin;
2673f2a5309cSSatish Balay   *array = a->a;
26743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2675f2a5309cSSatish Balay }
2676f2a5309cSSatish Balay 
2677d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2678d71ae5a4SJacob Faibussowitsch {
2679f2a5309cSSatish Balay   PetscFunctionBegin;
2680cda14afcSprj-   *array = NULL;
26813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2682f2a5309cSSatish Balay }
2683f2a5309cSSatish Balay 
2684d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2685d71ae5a4SJacob Faibussowitsch {
2686b264fe52SHong Zhang   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
268752768537SHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
268852768537SHong Zhang   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
268952768537SHong Zhang 
269052768537SHong Zhang   PetscFunctionBegin;
269152768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
26929566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
26933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
269452768537SHong Zhang }
269552768537SHong Zhang 
2696d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2697d71ae5a4SJacob Faibussowitsch {
269842ee4b1aSHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
269931ce2d13SHong Zhang   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2700e838b9e7SJed Brown   PetscBLASInt one = 1;
270142ee4b1aSHong Zhang 
270242ee4b1aSHong Zhang   PetscFunctionBegin;
2703134adf20SPierre Jolivet   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2704134adf20SPierre Jolivet     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2705134adf20SPierre Jolivet     if (e) {
27069566063dSJacob Faibussowitsch       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2707134adf20SPierre Jolivet       if (e) {
27089566063dSJacob Faibussowitsch         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2709134adf20SPierre Jolivet         if (e) str = SAME_NONZERO_PATTERN;
2710134adf20SPierre Jolivet       }
2711134adf20SPierre Jolivet     }
271254c59aa7SJacob Faibussowitsch     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2713134adf20SPierre Jolivet   }
271442ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2715f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2716c5df96a5SBarry Smith     PetscBLASInt bnz;
27179566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2718792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
27199566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2720ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
27219566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
272242ee4b1aSHong Zhang   } else {
272352768537SHong Zhang     Mat       B;
272452768537SHong Zhang     PetscInt *nnz;
272554c59aa7SJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
27269566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
27279566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
27289566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
27299566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
27309566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
27319566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
27329566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
27339566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
27349566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
27359566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
27369566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
273742ee4b1aSHong Zhang   }
27383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
273942ee4b1aSHong Zhang }
274042ee4b1aSHong Zhang 
2741d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2742d71ae5a4SJacob Faibussowitsch {
2743ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
27442726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27452726fb6dSPierre Jolivet   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
27462726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
27472726fb6dSPierre Jolivet 
27482726fb6dSPierre Jolivet   PetscFunctionBegin;
27492726fb6dSPierre Jolivet   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
27503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2751ff6a9541SJacob Faibussowitsch #else
2752ff6a9541SJacob Faibussowitsch   (void)A;
2753ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2754ff6a9541SJacob Faibussowitsch #endif
27552726fb6dSPierre Jolivet }
27562726fb6dSPierre Jolivet 
2757ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2758d71ae5a4SJacob Faibussowitsch {
2759ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
276099cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
276199cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2762dd6ea824SBarry Smith   MatScalar   *aa = a->a;
276399cafbc1SBarry Smith 
276499cafbc1SBarry Smith   PetscFunctionBegin;
276599cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
27663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2767ff6a9541SJacob Faibussowitsch #else
2768ff6a9541SJacob Faibussowitsch   (void)A;
2769ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2770ff6a9541SJacob Faibussowitsch #endif
277199cafbc1SBarry Smith }
277299cafbc1SBarry Smith 
2773ff6a9541SJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2774d71ae5a4SJacob Faibussowitsch {
2775ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
277699cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
277799cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2778dd6ea824SBarry Smith   MatScalar   *aa = a->a;
277999cafbc1SBarry Smith 
278099cafbc1SBarry Smith   PetscFunctionBegin;
278199cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
27823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2783ff6a9541SJacob Faibussowitsch #else
2784ff6a9541SJacob Faibussowitsch   (void)A;
2785ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2786ff6a9541SJacob Faibussowitsch #endif
278799cafbc1SBarry Smith }
278899cafbc1SBarry Smith 
27893acb8795SBarry Smith /*
27902479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
27913acb8795SBarry Smith */
2792ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2793d71ae5a4SJacob Faibussowitsch {
27943acb8795SBarry Smith   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
27953acb8795SBarry Smith   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
27963acb8795SBarry Smith   PetscInt     nz = a->i[m], row, *jj, mr, col;
27973acb8795SBarry Smith 
27983acb8795SBarry Smith   PetscFunctionBegin;
27993acb8795SBarry Smith   *nn = n;
28003ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28015f80ce2aSJacob Faibussowitsch   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
28029566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28039566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28049566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28053acb8795SBarry Smith   jj = a->j;
2806ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
28073acb8795SBarry Smith   cia[0] = oshift;
2808ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28099566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
28103acb8795SBarry Smith   jj = a->j;
28113acb8795SBarry Smith   for (row = 0; row < m; row++) {
28123acb8795SBarry Smith     mr = a->i[row + 1] - a->i[row];
28133acb8795SBarry Smith     for (i = 0; i < mr; i++) {
28143acb8795SBarry Smith       col = *jj++;
281526fbe8dcSKarl Rupp 
28163acb8795SBarry Smith       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
28173acb8795SBarry Smith     }
28183acb8795SBarry Smith   }
28199566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
28209371c9d4SSatish Balay   *ia = cia;
28219371c9d4SSatish Balay   *ja = cja;
28223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28233acb8795SBarry Smith }
28243acb8795SBarry Smith 
2825ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2826d71ae5a4SJacob Faibussowitsch {
28273acb8795SBarry Smith   PetscFunctionBegin;
28283ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28299566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ia));
28309566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ja));
28313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28323acb8795SBarry Smith }
28333acb8795SBarry Smith 
2834525d23c0SHong Zhang /*
2835525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2836525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2837040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2838525d23c0SHong Zhang  */
2839d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2840d71ae5a4SJacob Faibussowitsch {
2841525d23c0SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2842c0349474SHong Zhang   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2843525d23c0SHong Zhang   PetscInt     nz = a->i[m], row, *jj, mr, col;
2844525d23c0SHong Zhang   PetscInt    *cspidx;
2845f6d58c54SBarry Smith 
2846f6d58c54SBarry Smith   PetscFunctionBegin;
2847525d23c0SHong Zhang   *nn = n;
28483ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2849f6d58c54SBarry Smith 
28509566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cspidx));
2854525d23c0SHong Zhang   jj = a->j;
2855ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2856525d23c0SHong Zhang   cia[0] = oshift;
2857ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28589566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
2859525d23c0SHong Zhang   jj = a->j;
2860525d23c0SHong Zhang   for (row = 0; row < m; row++) {
2861525d23c0SHong Zhang     mr = a->i[row + 1] - a->i[row];
2862525d23c0SHong Zhang     for (i = 0; i < mr; i++) {
2863525d23c0SHong Zhang       col                                         = *jj++;
2864525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2865525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2866525d23c0SHong Zhang     }
2867525d23c0SHong Zhang   }
28689566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
2869071fcb05SBarry Smith   *ia    = cia;
2870071fcb05SBarry Smith   *ja    = cja;
2871525d23c0SHong Zhang   *spidx = cspidx;
28723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2873f6d58c54SBarry Smith }
2874f6d58c54SBarry Smith 
2875d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2876d71ae5a4SJacob Faibussowitsch {
2877525d23c0SHong Zhang   PetscFunctionBegin;
28789566063dSJacob Faibussowitsch   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
28799566063dSJacob Faibussowitsch   PetscCall(PetscFree(*spidx));
28803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2881f6d58c54SBarry Smith }
288299cafbc1SBarry Smith 
288366976f2fSJacob Faibussowitsch static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2884d71ae5a4SJacob Faibussowitsch {
28857d68702bSBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
28867d68702bSBarry Smith 
28877d68702bSBarry Smith   PetscFunctionBegin;
288848a46eb9SPierre Jolivet   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
28899566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
28903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28917d68702bSBarry Smith }
28927d68702bSBarry Smith 
289317ea310bSPierre Jolivet PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
289417ea310bSPierre Jolivet {
289517ea310bSPierre Jolivet   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
289617ea310bSPierre Jolivet   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
289717ea310bSPierre Jolivet   PetscInt     m = A->rmap->N, *ailen = a->ilen;
289817ea310bSPierre Jolivet   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
289917ea310bSPierre Jolivet   MatScalar   *aa = a->a, *ap;
290017ea310bSPierre Jolivet   PetscBool    zero;
290117ea310bSPierre Jolivet 
290217ea310bSPierre Jolivet   PetscFunctionBegin;
290317ea310bSPierre Jolivet   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
290417ea310bSPierre Jolivet   if (m) rmax = ailen[0];
290517ea310bSPierre Jolivet   for (i = 1; i <= mbs; i++) {
290617ea310bSPierre Jolivet     for (k = ai[i - 1]; k < ai[i]; k++) {
290717ea310bSPierre Jolivet       zero = PETSC_TRUE;
290817ea310bSPierre Jolivet       ap   = aa + bs2 * k;
290917ea310bSPierre Jolivet       for (j = 0; j < bs2 && zero; j++) {
291017ea310bSPierre Jolivet         if (ap[j] != 0.0) zero = PETSC_FALSE;
291117ea310bSPierre Jolivet       }
291217ea310bSPierre Jolivet       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
291317ea310bSPierre Jolivet       else {
291417ea310bSPierre Jolivet         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
291517ea310bSPierre Jolivet         aj[k - fshift] = aj[k];
291617ea310bSPierre Jolivet         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
291717ea310bSPierre Jolivet       }
291817ea310bSPierre Jolivet     }
291917ea310bSPierre Jolivet     ai[i - 1] -= fshift_prev;
292017ea310bSPierre Jolivet     fshift_prev  = fshift;
292117ea310bSPierre Jolivet     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
292217ea310bSPierre Jolivet     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
292317ea310bSPierre Jolivet     rmax = PetscMax(rmax, ailen[i - 1]);
292417ea310bSPierre Jolivet   }
292517ea310bSPierre Jolivet   if (fshift) {
292617ea310bSPierre Jolivet     if (mbs) {
292717ea310bSPierre Jolivet       ai[mbs] -= fshift;
292817ea310bSPierre Jolivet       a->nz = ai[mbs];
292917ea310bSPierre Jolivet     }
293017ea310bSPierre Jolivet     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
293117ea310bSPierre Jolivet     A->nonzerostate++;
293217ea310bSPierre Jolivet     A->info.nz_unneeded += (PetscReal)fshift;
293317ea310bSPierre Jolivet     a->rmax = rmax;
293417ea310bSPierre Jolivet     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
293517ea310bSPierre Jolivet     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
293617ea310bSPierre Jolivet   }
293717ea310bSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
293817ea310bSPierre Jolivet }
293917ea310bSPierre Jolivet 
2940dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2941cc2dc46cSBarry Smith                                        MatGetRow_SeqBAIJ,
2942cc2dc46cSBarry Smith                                        MatRestoreRow_SeqBAIJ,
2943cc2dc46cSBarry Smith                                        MatMult_SeqBAIJ_N,
294497304618SKris Buschelman                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
29457c922b88SBarry Smith                                        MatMultTranspose_SeqBAIJ,
29467c922b88SBarry Smith                                        MatMultTransposeAdd_SeqBAIJ,
2947f4259b30SLisandro Dalcin                                        NULL,
2948f4259b30SLisandro Dalcin                                        NULL,
2949f4259b30SLisandro Dalcin                                        NULL,
2950f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
2951cc2dc46cSBarry Smith                                        MatLUFactor_SeqBAIJ,
2952f4259b30SLisandro Dalcin                                        NULL,
2953f4259b30SLisandro Dalcin                                        NULL,
2954f2501298SSatish Balay                                        MatTranspose_SeqBAIJ,
295597304618SKris Buschelman                                        /* 15*/ MatGetInfo_SeqBAIJ,
2956cc2dc46cSBarry Smith                                        MatEqual_SeqBAIJ,
2957cc2dc46cSBarry Smith                                        MatGetDiagonal_SeqBAIJ,
2958cc2dc46cSBarry Smith                                        MatDiagonalScale_SeqBAIJ,
2959cc2dc46cSBarry Smith                                        MatNorm_SeqBAIJ,
2960f4259b30SLisandro Dalcin                                        /* 20*/ NULL,
2961cc2dc46cSBarry Smith                                        MatAssemblyEnd_SeqBAIJ,
2962cc2dc46cSBarry Smith                                        MatSetOption_SeqBAIJ,
2963cc2dc46cSBarry Smith                                        MatZeroEntries_SeqBAIJ,
2964d519adbfSMatthew Knepley                                        /* 24*/ MatZeroRows_SeqBAIJ,
2965f4259b30SLisandro Dalcin                                        NULL,
2966f4259b30SLisandro Dalcin                                        NULL,
2967f4259b30SLisandro Dalcin                                        NULL,
2968f4259b30SLisandro Dalcin                                        NULL,
296926cec326SBarry Smith                                        /* 29*/ MatSetUp_Seq_Hash,
2970f4259b30SLisandro Dalcin                                        NULL,
2971f4259b30SLisandro Dalcin                                        NULL,
2972f4259b30SLisandro Dalcin                                        NULL,
2973f4259b30SLisandro Dalcin                                        NULL,
2974d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_SeqBAIJ,
2975f4259b30SLisandro Dalcin                                        NULL,
2976f4259b30SLisandro Dalcin                                        NULL,
2977cc2dc46cSBarry Smith                                        MatILUFactor_SeqBAIJ,
2978f4259b30SLisandro Dalcin                                        NULL,
2979d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_SeqBAIJ,
29807dae84e0SHong Zhang                                        MatCreateSubMatrices_SeqBAIJ,
2981cc2dc46cSBarry Smith                                        MatIncreaseOverlap_SeqBAIJ,
2982cc2dc46cSBarry Smith                                        MatGetValues_SeqBAIJ,
29833c896bc6SHong Zhang                                        MatCopy_SeqBAIJ,
2984f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
2985cc2dc46cSBarry Smith                                        MatScale_SeqBAIJ,
29867d68702bSBarry Smith                                        MatShift_SeqBAIJ,
2987f4259b30SLisandro Dalcin                                        NULL,
298897b48c8fSBarry Smith                                        MatZeroRowsColumns_SeqBAIJ,
2989f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
29903b2fbd54SBarry Smith                                        MatGetRowIJ_SeqBAIJ,
299192c4ed94SBarry Smith                                        MatRestoreRowIJ_SeqBAIJ,
29923acb8795SBarry Smith                                        MatGetColumnIJ_SeqBAIJ,
29933acb8795SBarry Smith                                        MatRestoreColumnIJ_SeqBAIJ,
299493dfae19SHong Zhang                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
2995f4259b30SLisandro Dalcin                                        NULL,
2996f4259b30SLisandro Dalcin                                        NULL,
2997090001bdSToby Isaac                                        NULL,
2998d3825aa8SBarry Smith                                        MatSetValuesBlocked_SeqBAIJ,
29997dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3000b9b97703SBarry Smith                                        MatDestroy_SeqBAIJ,
3001b9b97703SBarry Smith                                        MatView_SeqBAIJ,
3002f4259b30SLisandro Dalcin                                        NULL,
3003f4259b30SLisandro Dalcin                                        NULL,
3004f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
3005f4259b30SLisandro Dalcin                                        NULL,
3006f4259b30SLisandro Dalcin                                        NULL,
3007f4259b30SLisandro Dalcin                                        NULL,
30088bb0f5c6SPierre Jolivet                                        MatGetRowMaxAbs_SeqBAIJ,
30098bb0f5c6SPierre Jolivet                                        /* 69*/ NULL,
3010c87e5d42SMatthew Knepley                                        MatConvert_Basic,
3011f4259b30SLisandro Dalcin                                        NULL,
3012f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
3013f4259b30SLisandro Dalcin                                        NULL,
30148bb0f5c6SPierre Jolivet                                        /* 74*/ NULL,
3015f4259b30SLisandro Dalcin                                        NULL,
3016f4259b30SLisandro Dalcin                                        NULL,
3017f4259b30SLisandro Dalcin                                        NULL,
30185bba2384SShri Abhyankar                                        MatLoad_SeqBAIJ,
30198bb0f5c6SPierre Jolivet                                        /* 79*/ NULL,
30208bb0f5c6SPierre Jolivet                                        NULL,
30218bb0f5c6SPierre Jolivet                                        NULL,
30228bb0f5c6SPierre Jolivet                                        NULL,
30238bb0f5c6SPierre Jolivet                                        NULL,
3024f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
3025f4259b30SLisandro Dalcin                                        NULL,
3026f4259b30SLisandro Dalcin                                        NULL,
3027f4259b30SLisandro Dalcin                                        NULL,
3028f4259b30SLisandro Dalcin                                        NULL,
3029f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
3030f4259b30SLisandro Dalcin                                        NULL,
3031f4259b30SLisandro Dalcin                                        NULL,
3032f4259b30SLisandro Dalcin                                        NULL,
30338bb0f5c6SPierre Jolivet                                        MatConjugate_SeqBAIJ,
3034f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
3035f4259b30SLisandro Dalcin                                        NULL,
30368bb0f5c6SPierre Jolivet                                        MatRealPart_SeqBAIJ,
30378bb0f5c6SPierre Jolivet                                        MatImaginaryPart_SeqBAIJ,
3038f4259b30SLisandro Dalcin                                        NULL,
3039f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
3040f4259b30SLisandro Dalcin                                        NULL,
3041f4259b30SLisandro Dalcin                                        NULL,
3042f4259b30SLisandro Dalcin                                        NULL,
30438bb0f5c6SPierre Jolivet                                        NULL,
3044*421480d9SBarry Smith                                        /*104*/ NULL,
30458bb0f5c6SPierre Jolivet                                        NULL,
30468bb0f5c6SPierre Jolivet                                        NULL,
3047f4259b30SLisandro Dalcin                                        NULL,
3048f4259b30SLisandro Dalcin                                        NULL,
3049f4259b30SLisandro Dalcin                                        /*109*/ NULL,
3050f4259b30SLisandro Dalcin                                        NULL,
3051547795f9SHong Zhang                                        MatMultHermitianTranspose_SeqBAIJ,
3052d6037b41SHong Zhang                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3053f4259b30SLisandro Dalcin                                        NULL,
3054*421480d9SBarry Smith                                        /*114*/ NULL,
3055857cbf51SRichard Tran Mills                                        MatGetColumnReductions_SeqBAIJ,
30563964eb88SJed Brown                                        MatInvertBlockDiagonal_SeqBAIJ,
3057f4259b30SLisandro Dalcin                                        NULL,
3058*421480d9SBarry Smith                                        NULL,
30598bb0f5c6SPierre Jolivet                                        /*119*/ NULL,
3060f4259b30SLisandro Dalcin                                        NULL,
3061f4259b30SLisandro Dalcin                                        NULL,
3062f4259b30SLisandro Dalcin                                        NULL,
3063f4259b30SLisandro Dalcin                                        NULL,
30648bb0f5c6SPierre Jolivet                                        /*124*/ NULL,
30658bb0f5c6SPierre Jolivet                                        NULL,
30668bb0f5c6SPierre Jolivet                                        MatSetBlockSizes_Default,
30678bb0f5c6SPierre Jolivet                                        NULL,
3068*421480d9SBarry Smith                                        MatFDColoringSetUp_SeqXAIJ,
3069*421480d9SBarry Smith                                        /*129*/ NULL,
30708bb0f5c6SPierre Jolivet                                        MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
30718bb0f5c6SPierre Jolivet                                        MatDestroySubMatrices_SeqBAIJ,
30728bb0f5c6SPierre Jolivet                                        NULL,
3073f4259b30SLisandro Dalcin                                        NULL,
3074*421480d9SBarry Smith                                        /*134*/ NULL,
3075f4259b30SLisandro Dalcin                                        NULL,
3076eede4a3fSMark Adams                                        MatEliminateZeros_SeqBAIJ,
30774cc2b5b5SPierre Jolivet                                        MatGetRowSumAbs_SeqBAIJ,
307842ce410bSJunchao Zhang                                        NULL,
3079*421480d9SBarry Smith                                        /*139*/ NULL,
308042ce410bSJunchao Zhang                                        NULL,
308103db1824SAlex Lindsay                                        MatCopyHashToXAIJ_Seq_Hash,
3082c2be7ffeSStefano Zampini                                        NULL,
308303db1824SAlex Lindsay                                        NULL};
30842593348eSBarry Smith 
3085ff6a9541SJacob Faibussowitsch static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3086d71ae5a4SJacob Faibussowitsch {
30873e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
30888ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
30893e90b805SBarry Smith 
30903e90b805SBarry Smith   PetscFunctionBegin;
30915f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
30923e90b805SBarry Smith 
30933e90b805SBarry Smith   /* allocate space for values if not already there */
3094ff6a9541SJacob Faibussowitsch   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
30953e90b805SBarry Smith 
30963e90b805SBarry Smith   /* copy values over */
30979566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
30983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
30993e90b805SBarry Smith }
31003e90b805SBarry Smith 
3101ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3102d71ae5a4SJacob Faibussowitsch {
31033e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
31048ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
31053e90b805SBarry Smith 
31063e90b805SBarry Smith   PetscFunctionBegin;
31075f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
31085f80ce2aSJacob Faibussowitsch   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
31093e90b805SBarry Smith 
31103e90b805SBarry Smith   /* copy values over */
31119566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
31123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31133e90b805SBarry Smith }
31143e90b805SBarry Smith 
3115cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3116cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3117273d9f13SBarry Smith 
3118f9663b93SPierre Jolivet PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3119d71ae5a4SJacob Faibussowitsch {
3120ad79cf63SBarry Smith   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3121535b19f3SBarry Smith   PetscInt     i, mbs, nbs, bs2;
31228afaa268SBarry Smith   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3123a23d5eceSKris Buschelman 
3124a23d5eceSKris Buschelman   PetscFunctionBegin;
3125ad79cf63SBarry Smith   if (B->hash_active) {
3126ad79cf63SBarry Smith     PetscInt bs;
3127aea10558SJacob Faibussowitsch     B->ops[0] = b->cops;
3128ad79cf63SBarry Smith     PetscCall(PetscHMapIJVDestroy(&b->ht));
3129ad79cf63SBarry Smith     PetscCall(MatGetBlockSize(B, &bs));
3130ad79cf63SBarry Smith     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3131ad79cf63SBarry Smith     PetscCall(PetscFree(b->dnz));
3132ad79cf63SBarry Smith     PetscCall(PetscFree(b->bdnz));
3133ad79cf63SBarry Smith     B->hash_active = PETSC_FALSE;
3134ad79cf63SBarry Smith   }
31352576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3136ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
3137ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
3138ab93d7beSBarry Smith     nz             = 0;
3139ab93d7beSBarry Smith   }
31408c07d4e3SBarry Smith 
314158b7e2c1SStefano Zampini   PetscCall(MatSetBlockSize(B, bs));
31429566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
31439566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
31449566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3145899cda47SBarry Smith 
3146899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
3147899cda47SBarry Smith 
3148d0f46423SBarry Smith   mbs = B->rmap->n / bs;
3149d0f46423SBarry Smith   nbs = B->cmap->n / bs;
3150a23d5eceSKris Buschelman   bs2 = bs * bs;
3151a23d5eceSKris Buschelman 
31525f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3153a23d5eceSKris Buschelman 
3154a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
31555f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3156a23d5eceSKris Buschelman   if (nnz) {
3157a23d5eceSKris Buschelman     for (i = 0; i < mbs; i++) {
31585f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
31595f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3160a23d5eceSKris Buschelman     }
3161a23d5eceSKris Buschelman   }
3162a23d5eceSKris Buschelman 
3163d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
31649566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3165d0609cedSBarry Smith   PetscOptionsEnd();
31668c07d4e3SBarry Smith 
3167a23d5eceSKris Buschelman   if (!flg) {
3168a23d5eceSKris Buschelman     switch (bs) {
3169a23d5eceSKris Buschelman     case 1:
3170a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
3171a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3172a23d5eceSKris Buschelman       break;
3173a23d5eceSKris Buschelman     case 2:
3174a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
3175a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3176a23d5eceSKris Buschelman       break;
3177a23d5eceSKris Buschelman     case 3:
3178a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
3179a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3180a23d5eceSKris Buschelman       break;
3181a23d5eceSKris Buschelman     case 4:
3182a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
3183a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3184a23d5eceSKris Buschelman       break;
3185a23d5eceSKris Buschelman     case 5:
3186a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
3187a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3188a23d5eceSKris Buschelman       break;
3189a23d5eceSKris Buschelman     case 6:
3190a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
3191a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3192a23d5eceSKris Buschelman       break;
3193a23d5eceSKris Buschelman     case 7:
3194a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
3195a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3196a23d5eceSKris Buschelman       break;
31979371c9d4SSatish Balay     case 9: {
31986679dcc1SBarry Smith       PetscInt version = 1;
31999566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32006679dcc1SBarry Smith       switch (version) {
32015f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32026679dcc1SBarry Smith       case 1:
320396e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
320496e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3205835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32066679dcc1SBarry Smith         break;
32076679dcc1SBarry Smith #endif
32086679dcc1SBarry Smith       default:
320996e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
321096e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3211835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
321296e086a2SDaniel Kokron         break;
32136679dcc1SBarry Smith       }
32146679dcc1SBarry Smith       break;
32156679dcc1SBarry Smith     }
3216ebada01fSBarry Smith     case 11:
3217ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
3218ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3219ebada01fSBarry Smith       break;
32209371c9d4SSatish Balay     case 12: {
32216679dcc1SBarry Smith       PetscInt version = 1;
32229566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32236679dcc1SBarry Smith       switch (version) {
32246679dcc1SBarry Smith       case 1:
32256679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
32266679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3227835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32288ab949d8SShri Abhyankar         break;
32296679dcc1SBarry Smith       case 2:
32306679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
32316679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3232835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32336679dcc1SBarry Smith         break;
32346679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32356679dcc1SBarry Smith       case 3:
32366679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
32376679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3238835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32396679dcc1SBarry Smith         break;
32406679dcc1SBarry Smith #endif
3241a23d5eceSKris Buschelman       default:
3242a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
3243a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3244835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32456679dcc1SBarry Smith         break;
32466679dcc1SBarry Smith       }
32476679dcc1SBarry Smith       break;
32486679dcc1SBarry Smith     }
32499371c9d4SSatish Balay     case 15: {
32506679dcc1SBarry Smith       PetscInt version = 1;
32519566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32526679dcc1SBarry Smith       switch (version) {
32536679dcc1SBarry Smith       case 1:
32546679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3255835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32566679dcc1SBarry Smith         break;
32576679dcc1SBarry Smith       case 2:
32586679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3259835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32606679dcc1SBarry Smith         break;
32616679dcc1SBarry Smith       case 3:
32626679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3263835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32646679dcc1SBarry Smith         break;
32656679dcc1SBarry Smith       case 4:
32666679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3267835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32686679dcc1SBarry Smith         break;
32696679dcc1SBarry Smith       default:
32706679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_N;
3271835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32726679dcc1SBarry Smith         break;
32736679dcc1SBarry Smith       }
32746679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
32756679dcc1SBarry Smith       break;
32766679dcc1SBarry Smith     }
32776679dcc1SBarry Smith     default:
32786679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
32796679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3280835f2295SStefano Zampini       PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3281a23d5eceSKris Buschelman       break;
3282a23d5eceSKris Buschelman     }
3283a23d5eceSKris Buschelman   }
3284e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
3285a23d5eceSKris Buschelman   b->mbs      = mbs;
3286a23d5eceSKris Buschelman   b->nbs      = nbs;
3287ab93d7beSBarry Smith   if (!skipallocation) {
32882ee49352SLisandro Dalcin     if (!b->imax) {
32899566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
329026fbe8dcSKarl Rupp 
32914fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
32922ee49352SLisandro Dalcin     }
3293ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
329426fbe8dcSKarl Rupp     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3295a23d5eceSKris Buschelman     if (!nnz) {
3296a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3297c62bd62aSJed Brown       else if (nz < 0) nz = 1;
32985d2a9ed1SStefano Zampini       nz = PetscMin(nz, nbs);
3299a23d5eceSKris Buschelman       for (i = 0; i < mbs; i++) b->imax[i] = nz;
33009566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, mbs, &nz));
3301a23d5eceSKris Buschelman     } else {
3302c73702f5SBarry Smith       PetscInt64 nz64 = 0;
33039371c9d4SSatish Balay       for (i = 0; i < mbs; i++) {
33049371c9d4SSatish Balay         b->imax[i] = nnz[i];
33059371c9d4SSatish Balay         nz64 += nnz[i];
33069371c9d4SSatish Balay       }
33079566063dSJacob Faibussowitsch       PetscCall(PetscIntCast(nz64, &nz));
3308a23d5eceSKris Buschelman     }
3309a23d5eceSKris Buschelman 
3310a23d5eceSKris Buschelman     /* allocate the matrix space */
33119566063dSJacob Faibussowitsch     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
33129f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
33139f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3314672ba085SHong Zhang     if (B->structure_only) {
33159f0612e4SBarry Smith       b->free_a = PETSC_FALSE;
3316672ba085SHong Zhang     } else {
33176679dcc1SBarry Smith       PetscInt nzbs2 = 0;
33189566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
33199f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
33209f0612e4SBarry Smith       b->free_a = PETSC_TRUE;
33219566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(b->a, nz * bs2));
3322672ba085SHong Zhang     }
3323672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
33249f0612e4SBarry Smith     PetscCall(PetscArrayzero(b->j, nz));
3325672ba085SHong Zhang 
3326a23d5eceSKris Buschelman     b->i[0] = 0;
3327ad540459SPierre Jolivet     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3328e811da20SHong Zhang   } else {
3329e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3330e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3331ab93d7beSBarry Smith   }
3332a23d5eceSKris Buschelman 
3333a23d5eceSKris Buschelman   b->bs2              = bs2;
3334a23d5eceSKris Buschelman   b->mbs              = mbs;
3335a23d5eceSKris Buschelman   b->nz               = 0;
3336b32cb4a7SJed Brown   b->maxnz            = nz;
3337b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3338cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3339cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
33409566063dSJacob Faibussowitsch   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
33413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3342a23d5eceSKris Buschelman }
3343a23d5eceSKris Buschelman 
334466976f2fSJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3345d71ae5a4SJacob Faibussowitsch {
3346725b52f3SLisandro Dalcin   PetscInt     i, m, nz, nz_max = 0, *nnz;
3347f4259b30SLisandro Dalcin   PetscScalar *values      = NULL;
3348d47bf9aaSJed Brown   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3349725b52f3SLisandro Dalcin 
3350725b52f3SLisandro Dalcin   PetscFunctionBegin;
33515f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
33529566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
33539566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
33549566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
33559566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
33569566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3357d0f46423SBarry Smith   m = B->rmap->n / bs;
3358725b52f3SLisandro Dalcin 
33595f80ce2aSJacob Faibussowitsch   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
33609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &nnz));
3361725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3362cf12db73SBarry Smith     nz = ii[i + 1] - ii[i];
33635f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3364725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3365725b52f3SLisandro Dalcin     nnz[i] = nz;
3366725b52f3SLisandro Dalcin   }
33679566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
33689566063dSJacob Faibussowitsch   PetscCall(PetscFree(nnz));
3369725b52f3SLisandro Dalcin 
3370725b52f3SLisandro Dalcin   values = (PetscScalar *)V;
337148a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3372725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3373cf12db73SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
3374cf12db73SBarry Smith     const PetscInt *icols = jj + ii[i];
3375bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3376cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
33779566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
33783adadaf3SJed Brown     } else {
33793adadaf3SJed Brown       PetscInt j;
33803adadaf3SJed Brown       for (j = 0; j < ncols; j++) {
33813adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
33829566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
33833adadaf3SJed Brown       }
33843adadaf3SJed Brown     }
3385725b52f3SLisandro Dalcin   }
33869566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
33879566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
33889566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
33899566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
33903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3391725b52f3SLisandro Dalcin }
3392725b52f3SLisandro Dalcin 
3393cda14afcSprj- /*@C
339411a5261eSBarry Smith   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3395cda14afcSprj- 
3396cda14afcSprj-   Not Collective
3397cda14afcSprj- 
3398cda14afcSprj-   Input Parameter:
3399fe59aa6dSJacob Faibussowitsch . A - a `MATSEQBAIJ` matrix
3400cda14afcSprj- 
3401cda14afcSprj-   Output Parameter:
3402cda14afcSprj- . array - pointer to the data
3403cda14afcSprj- 
3404cda14afcSprj-   Level: intermediate
3405cda14afcSprj- 
34061cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3407cda14afcSprj- @*/
34085d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3409d71ae5a4SJacob Faibussowitsch {
3410cda14afcSprj-   PetscFunctionBegin;
3411cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
34123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3413cda14afcSprj- }
3414cda14afcSprj- 
3415cda14afcSprj- /*@C
341611a5261eSBarry Smith   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3417cda14afcSprj- 
3418cda14afcSprj-   Not Collective
3419cda14afcSprj- 
3420cda14afcSprj-   Input Parameters:
3421fe59aa6dSJacob Faibussowitsch + A     - a `MATSEQBAIJ` matrix
3422cda14afcSprj- - array - pointer to the data
3423cda14afcSprj- 
3424cda14afcSprj-   Level: intermediate
3425cda14afcSprj- 
34261cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3427cda14afcSprj- @*/
34285d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3429d71ae5a4SJacob Faibussowitsch {
3430cda14afcSprj-   PetscFunctionBegin;
3431cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
34323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3433cda14afcSprj- }
3434cda14afcSprj- 
34350bad9183SKris Buschelman /*MC
3436fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
34370bad9183SKris Buschelman    block sparse compressed row format.
34380bad9183SKris Buschelman 
34390bad9183SKris Buschelman    Options Database Keys:
344020f4b53cSBarry Smith + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
34416679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
34420bad9183SKris Buschelman 
34430bad9183SKris Buschelman    Level: beginner
34440cd7f59aSBarry Smith 
34450cd7f59aSBarry Smith    Notes:
344611a5261eSBarry Smith    `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
344711a5261eSBarry Smith    space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
34480bad9183SKris Buschelman 
34492ef1f0ffSBarry Smith    Run with `-info` to see what version of the matrix-vector product is being used
34506679dcc1SBarry Smith 
34511cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
34520bad9183SKris Buschelman M*/
34530bad9183SKris Buschelman 
3454cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3455b24902e0SBarry Smith 
3456d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3457d71ae5a4SJacob Faibussowitsch {
3458c1ac3661SBarry Smith   PetscMPIInt  size;
3459b6490206SBarry Smith   Mat_SeqBAIJ *b;
34603b2fbd54SBarry Smith 
34613a40ed3dSBarry Smith   PetscFunctionBegin;
34629566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
34635f80ce2aSJacob Faibussowitsch   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3464b6490206SBarry Smith 
34654dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
3466b0a32e0cSBarry Smith   B->data   = (void *)b;
3467aea10558SJacob Faibussowitsch   B->ops[0] = MatOps_Values;
346826fbe8dcSKarl Rupp 
3469f4259b30SLisandro Dalcin   b->row          = NULL;
3470f4259b30SLisandro Dalcin   b->col          = NULL;
3471f4259b30SLisandro Dalcin   b->icol         = NULL;
34722593348eSBarry Smith   b->reallocs     = 0;
3473f4259b30SLisandro Dalcin   b->saved_values = NULL;
34742593348eSBarry Smith 
3475c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
34762593348eSBarry Smith   b->nonew              = 0;
3477f4259b30SLisandro Dalcin   b->diag               = NULL;
3478f4259b30SLisandro Dalcin   B->spptr              = NULL;
3479b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3480a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
34814e220ebcSLois Curfman McInnes 
34829566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
34839566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
34849566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
34859566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
34869566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
34879566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
34889566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
34899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
34909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
34919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
34927ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
34939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
34947ea3e4caSstefano_zampini #endif
34959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
34969566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
34973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
34982593348eSBarry Smith }
34992593348eSBarry Smith 
3500d6acfc2dSPierre Jolivet PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3501d71ae5a4SJacob Faibussowitsch {
3502b24902e0SBarry Smith   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3503a96a251dSBarry Smith   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3504de6a44a3SBarry Smith 
35053a40ed3dSBarry Smith   PetscFunctionBegin;
350631fe6a7dSBarry Smith   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
35075f80ce2aSJacob Faibussowitsch   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
35082593348eSBarry Smith 
35094fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35104fd072dbSBarry Smith     c->imax           = a->imax;
35114fd072dbSBarry Smith     c->ilen           = a->ilen;
35124fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
35134fd072dbSBarry Smith   } else {
35149566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3515b6490206SBarry Smith     for (i = 0; i < mbs; i++) {
35162593348eSBarry Smith       c->imax[i] = a->imax[i];
35172593348eSBarry Smith       c->ilen[i] = a->ilen[i];
35182593348eSBarry Smith     }
35194fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
35204fd072dbSBarry Smith   }
35212593348eSBarry Smith 
35222593348eSBarry Smith   /* allocate the matrix space */
352316a2bf60SHong Zhang   if (mallocmatspace) {
35244fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35259f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
35269f0612e4SBarry Smith       PetscCall(PetscArrayzero(c->a, bs2 * nz));
35279f0612e4SBarry Smith       c->free_a       = PETSC_TRUE;
35284fd072dbSBarry Smith       c->i            = a->i;
35294fd072dbSBarry Smith       c->j            = a->j;
3530379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
35314fd072dbSBarry Smith       c->parent       = A;
35321e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
35331e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
353426fbe8dcSKarl Rupp 
35359566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)A));
35369566063dSJacob Faibussowitsch       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35379566063dSJacob Faibussowitsch       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35384fd072dbSBarry Smith     } else {
35399f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
35409f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
35419f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3542379be0ddSLisandro Dalcin       c->free_a  = PETSC_TRUE;
35434fd072dbSBarry Smith       c->free_ij = PETSC_TRUE;
354426fbe8dcSKarl Rupp 
35459566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3546b6490206SBarry Smith       if (mbs > 0) {
35479566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(c->j, a->j, nz));
35482e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
35499566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
35502e8a6d31SBarry Smith         } else {
35519566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(c->a, bs2 * nz));
35522593348eSBarry Smith         }
35532593348eSBarry Smith       }
35541e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
35551e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
355616a2bf60SHong Zhang     }
35574fd072dbSBarry Smith   }
355816a2bf60SHong Zhang 
35592593348eSBarry Smith   c->roworiented = a->roworiented;
35602593348eSBarry Smith   c->nonew       = a->nonew;
356126fbe8dcSKarl Rupp 
35629566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
35639566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
356426fbe8dcSKarl Rupp 
35655c9eb25fSBarry Smith   c->bs2        = a->bs2;
35665c9eb25fSBarry Smith   c->mbs        = a->mbs;
35675c9eb25fSBarry Smith   c->nbs        = a->nbs;
35682593348eSBarry Smith   c->nz         = a->nz;
3569f2cbd3d5SJed Brown   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3570f361c04dSBarry Smith   c->solve_work = NULL;
3571f361c04dSBarry Smith   c->mult_work  = NULL;
3572f361c04dSBarry Smith   c->sor_workt  = NULL;
3573f361c04dSBarry Smith   c->sor_work   = NULL;
357488e51ccdSHong Zhang 
357588e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
357688e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3577cd6b891eSBarry Smith   if (a->compressedrow.use) {
357888e51ccdSHong Zhang     i = a->compressedrow.nrows;
35799566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
35809566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
35819566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
358288e51ccdSHong Zhang   } else {
358388e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
35840298fd71SBarry Smith     c->compressedrow.i      = NULL;
35850298fd71SBarry Smith     c->compressedrow.rindex = NULL;
358688e51ccdSHong Zhang   }
3587c05f355bSMark Adams   c->nonzerorowcnt = a->nonzerorowcnt;
3588e56f5c9eSBarry Smith   C->nonzerostate  = A->nonzerostate;
358926fbe8dcSKarl Rupp 
35909566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
35913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
35922593348eSBarry Smith }
35932593348eSBarry Smith 
3594d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3595d71ae5a4SJacob Faibussowitsch {
3596b24902e0SBarry Smith   PetscFunctionBegin;
35979566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
35989566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
35999566063dSJacob Faibussowitsch   PetscCall(MatSetType(*B, MATSEQBAIJ));
36009566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
36013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3602b24902e0SBarry Smith }
3603b24902e0SBarry Smith 
3604618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3605d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3606d71ae5a4SJacob Faibussowitsch {
3607b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3608b51a4376SLisandro Dalcin   PetscInt    *rowidxs, *colidxs;
3609b51a4376SLisandro Dalcin   PetscScalar *matvals;
3610b51a4376SLisandro Dalcin 
3611b51a4376SLisandro Dalcin   PetscFunctionBegin;
36129566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
3613b51a4376SLisandro Dalcin 
3614b51a4376SLisandro Dalcin   /* read matrix header */
36159566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
36165f80ce2aSJacob Faibussowitsch   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
36179371c9d4SSatish Balay   M  = header[1];
36189371c9d4SSatish Balay   N  = header[2];
36199371c9d4SSatish Balay   nz = header[3];
36205f80ce2aSJacob Faibussowitsch   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
36215f80ce2aSJacob Faibussowitsch   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
36225f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3623b51a4376SLisandro Dalcin 
3624b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
36259566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3626b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3627b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3628b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3629b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3630b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
36319566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
36329566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
3633b51a4376SLisandro Dalcin 
3634b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
36359566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
36365f80ce2aSJacob Faibussowitsch   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
36379566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
36389566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
36399371c9d4SSatish Balay   mbs = m / bs;
36409371c9d4SSatish Balay   nbs = n / bs;
3641b51a4376SLisandro Dalcin 
3642b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
36439566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
36449566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
36459371c9d4SSatish Balay   rowidxs[0] = 0;
36469371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3647b51a4376SLisandro Dalcin   sum = rowidxs[m];
36485f80ce2aSJacob Faibussowitsch   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3649b51a4376SLisandro Dalcin 
3650b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
36519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
36529566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
36539566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3654b51a4376SLisandro Dalcin 
3655b51a4376SLisandro Dalcin   {               /* preallocate matrix storage */
3656b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3657b51a4376SLisandro Dalcin     PetscInt *nnz;
3658618cc2edSLisandro Dalcin     PetscBool sbaij;
3659b51a4376SLisandro Dalcin 
36609566063dSJacob Faibussowitsch     PetscCall(PetscBTCreate(nbs, &bt));
36619566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(mbs, &nnz));
36629566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3663b51a4376SLisandro Dalcin     for (i = 0; i < mbs; i++) {
36649566063dSJacob Faibussowitsch       PetscCall(PetscBTMemzero(nbs, bt));
3665618cc2edSLisandro Dalcin       for (k = 0; k < bs; k++) {
3666618cc2edSLisandro Dalcin         PetscInt row = bs * i + k;
3667618cc2edSLisandro Dalcin         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3668618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3669618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3670618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3671618cc2edSLisandro Dalcin         }
3672618cc2edSLisandro Dalcin       }
3673b51a4376SLisandro Dalcin     }
36749566063dSJacob Faibussowitsch     PetscCall(PetscBTDestroy(&bt));
36759566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
36769566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
36779566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
3678b51a4376SLisandro Dalcin   }
3679b51a4376SLisandro Dalcin 
3680b51a4376SLisandro Dalcin   /* store matrix values */
3681b51a4376SLisandro Dalcin   for (i = 0; i < m; i++) {
3682b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
36839927e4dfSBarry Smith     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3684b51a4376SLisandro Dalcin   }
3685b51a4376SLisandro Dalcin 
36869566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
36879566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
36889566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
36899566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
36903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3691b51a4376SLisandro Dalcin }
3692b51a4376SLisandro Dalcin 
3693d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3694d71ae5a4SJacob Faibussowitsch {
36957f489da9SVaclav Hapla   PetscBool isbinary;
3696f501eaabSShri Abhyankar 
3697f501eaabSShri Abhyankar   PetscFunctionBegin;
36989566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
36995f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
37009566063dSJacob Faibussowitsch   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
37013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3702f501eaabSShri Abhyankar }
3703f501eaabSShri Abhyankar 
37045d83a8b1SBarry Smith /*@
370511a5261eSBarry Smith   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3706273d9f13SBarry Smith   compressed row) format.  For good matrix assembly performance the
370720f4b53cSBarry Smith   user should preallocate the matrix storage by setting the parameter `nz`
370820f4b53cSBarry Smith   (or the array `nnz`).
37092593348eSBarry Smith 
3710d083f849SBarry Smith   Collective
3711273d9f13SBarry Smith 
3712273d9f13SBarry Smith   Input Parameters:
371311a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
371411a5261eSBarry Smith . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
371511a5261eSBarry Smith          blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3716273d9f13SBarry Smith . m    - number of rows
3717273d9f13SBarry Smith . n    - number of columns
371835d8aa7fSBarry Smith . nz   - number of nonzero blocks  per block row (same for all rows)
371935d8aa7fSBarry Smith - nnz  - array containing the number of nonzero blocks in the various block rows
372020f4b53cSBarry Smith          (possibly different for each block row) or `NULL`
3721273d9f13SBarry Smith 
3722273d9f13SBarry Smith   Output Parameter:
3723273d9f13SBarry Smith . A - the matrix
3724273d9f13SBarry Smith 
3725273d9f13SBarry Smith   Options Database Keys:
372611a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3727a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3728273d9f13SBarry Smith 
3729273d9f13SBarry Smith   Level: intermediate
3730273d9f13SBarry Smith 
3731273d9f13SBarry Smith   Notes:
373277433607SBarry Smith   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
37332ef1f0ffSBarry Smith   MatXXXXSetPreallocation() paradigm instead of this routine directly.
37342ef1f0ffSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
37352ef1f0ffSBarry Smith 
3736d1be2dadSMatthew Knepley   The number of rows and columns must be divisible by blocksize.
3737d1be2dadSMatthew Knepley 
37382ef1f0ffSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
373949a6f317SBarry Smith 
374035d8aa7fSBarry Smith   A nonzero block is any block that as 1 or more nonzeros in it
374135d8aa7fSBarry Smith 
37422ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3743273d9f13SBarry Smith   storage.  That is, the stored row and column indices can begin at
374420f4b53cSBarry Smith   either one (as in Fortran) or zero.
3745273d9f13SBarry Smith 
37462ef1f0ffSBarry Smith   Specify the preallocated storage with either `nz` or `nnz` (not both).
37472ef1f0ffSBarry Smith   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3748651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3749273d9f13SBarry Smith   matrices.
3750273d9f13SBarry Smith 
37511cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3752273d9f13SBarry Smith @*/
3753d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3754d71ae5a4SJacob Faibussowitsch {
3755273d9f13SBarry Smith   PetscFunctionBegin;
37569566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
37579566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
37589566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJ));
37599566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
37603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3761273d9f13SBarry Smith }
3762273d9f13SBarry Smith 
37635d83a8b1SBarry Smith /*@
3764273d9f13SBarry Smith   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3765273d9f13SBarry Smith   per row in the matrix. For good matrix assembly performance the
376620f4b53cSBarry Smith   user should preallocate the matrix storage by setting the parameter `nz`
376720f4b53cSBarry Smith   (or the array `nnz`).
3768273d9f13SBarry Smith 
3769d083f849SBarry Smith   Collective
3770273d9f13SBarry Smith 
3771273d9f13SBarry Smith   Input Parameters:
37721c4f3114SJed Brown + B   - the matrix
377311a5261eSBarry Smith . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
377411a5261eSBarry Smith         blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3775273d9f13SBarry Smith . nz  - number of block nonzeros per block row (same for all rows)
3776273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows
37772ef1f0ffSBarry Smith         (possibly different for each block row) or `NULL`
3778273d9f13SBarry Smith 
3779273d9f13SBarry Smith   Options Database Keys:
378011a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3781a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3782273d9f13SBarry Smith 
3783273d9f13SBarry Smith   Level: intermediate
3784273d9f13SBarry Smith 
3785273d9f13SBarry Smith   Notes:
37862ef1f0ffSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
378749a6f317SBarry Smith 
378811a5261eSBarry Smith   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3789aa95bbe8SBarry Smith   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
379020f4b53cSBarry Smith   You can also run with the option `-info` and look for messages with the string
3791aa95bbe8SBarry Smith   malloc in them to see if additional memory allocation was needed.
3792aa95bbe8SBarry Smith 
37932ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3794273d9f13SBarry Smith   storage.  That is, the stored row and column indices can begin at
379520f4b53cSBarry Smith   either one (as in Fortran) or zero.
3796273d9f13SBarry Smith 
3797d8a51d2aSBarry Smith   Specify the preallocated storage with either `nz` or `nnz` (not both).
37982ef1f0ffSBarry Smith   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3799651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3800273d9f13SBarry Smith 
38011cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3802273d9f13SBarry Smith @*/
3803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3804d71ae5a4SJacob Faibussowitsch {
3805273d9f13SBarry Smith   PetscFunctionBegin;
38066ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
38076ba663aaSJed Brown   PetscValidType(B, 1);
38086ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3809cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
38103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3811273d9f13SBarry Smith }
3812a1d92eedSBarry Smith 
3813725b52f3SLisandro Dalcin /*@C
381411a5261eSBarry Smith   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3815725b52f3SLisandro Dalcin 
3816d083f849SBarry Smith   Collective
3817725b52f3SLisandro Dalcin 
3818725b52f3SLisandro Dalcin   Input Parameters:
38191c4f3114SJed Brown + B  - the matrix
382020f4b53cSBarry Smith . bs - the blocksize
3821d8a51d2aSBarry Smith . i  - the indices into `j` for the start of each local row (indices start with zero)
3822d8a51d2aSBarry Smith . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
3823d8a51d2aSBarry Smith - v  - optional values in the matrix, use `NULL` if not provided
3824725b52f3SLisandro Dalcin 
3825664954b6SBarry Smith   Level: advanced
3826725b52f3SLisandro Dalcin 
38273adadaf3SJed Brown   Notes:
3828d8a51d2aSBarry Smith   The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3829d8a51d2aSBarry Smith 
383011a5261eSBarry Smith   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
383111a5261eSBarry Smith   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
38323adadaf3SJed Brown   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
383311a5261eSBarry Smith   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
38343adadaf3SJed Brown   block column and the second index is over columns within a block.
38353adadaf3SJed Brown 
3836664954b6SBarry Smith   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3837664954b6SBarry Smith 
38381cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3839725b52f3SLisandro Dalcin @*/
3840d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3841d71ae5a4SJacob Faibussowitsch {
3842725b52f3SLisandro Dalcin   PetscFunctionBegin;
38436ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
38446ba663aaSJed Brown   PetscValidType(B, 1);
38456ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3846cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
38473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3848725b52f3SLisandro Dalcin }
3849725b52f3SLisandro Dalcin 
3850c75a6043SHong Zhang /*@
385111a5261eSBarry Smith   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3852c75a6043SHong Zhang 
3853d083f849SBarry Smith   Collective
3854c75a6043SHong Zhang 
3855c75a6043SHong Zhang   Input Parameters:
3856c75a6043SHong Zhang + comm - must be an MPI communicator of size 1
3857c75a6043SHong Zhang . bs   - size of block
3858c75a6043SHong Zhang . m    - number of rows
3859c75a6043SHong Zhang . n    - number of columns
3860483a2f95SBarry Smith . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3861c75a6043SHong Zhang . j    - column indices
3862c75a6043SHong Zhang - a    - matrix values
3863c75a6043SHong Zhang 
3864c75a6043SHong Zhang   Output Parameter:
3865c75a6043SHong Zhang . mat - the matrix
3866c75a6043SHong Zhang 
3867dfb205c3SBarry Smith   Level: advanced
3868c75a6043SHong Zhang 
3869c75a6043SHong Zhang   Notes:
38702ef1f0ffSBarry Smith   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3871c75a6043SHong Zhang   once the matrix is destroyed
3872c75a6043SHong Zhang 
3873c75a6043SHong Zhang   You cannot set new nonzero locations into this matrix, that will generate an error.
3874c75a6043SHong Zhang 
38752ef1f0ffSBarry Smith   The `i` and `j` indices are 0 based
3876c75a6043SHong Zhang 
387711a5261eSBarry Smith   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3878dfb205c3SBarry Smith 
38793adadaf3SJed Brown   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
38803adadaf3SJed Brown   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
38813adadaf3SJed Brown   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
38823adadaf3SJed Brown   with column-major ordering within blocks.
3883dfb205c3SBarry Smith 
38841cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3885c75a6043SHong Zhang @*/
3886d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3887d71ae5a4SJacob Faibussowitsch {
3888c75a6043SHong Zhang   Mat_SeqBAIJ *baij;
3889c75a6043SHong Zhang 
3890c75a6043SHong Zhang   PetscFunctionBegin;
38915f80ce2aSJacob Faibussowitsch   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
38925f80ce2aSJacob Faibussowitsch   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3893c75a6043SHong Zhang 
38949566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
38959566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, m, n));
38969566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATSEQBAIJ));
38979566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3898c75a6043SHong Zhang   baij = (Mat_SeqBAIJ *)(*mat)->data;
38999566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3900c75a6043SHong Zhang 
3901c75a6043SHong Zhang   baij->i = i;
3902c75a6043SHong Zhang   baij->j = j;
3903c75a6043SHong Zhang   baij->a = a;
390426fbe8dcSKarl Rupp 
3905c75a6043SHong Zhang   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3906e6b907acSBarry Smith   baij->free_a         = PETSC_FALSE;
3907e6b907acSBarry Smith   baij->free_ij        = PETSC_FALSE;
3908ceb5bf51SJacob Faibussowitsch   baij->free_imax_ilen = PETSC_TRUE;
3909c75a6043SHong Zhang 
3910ceb5bf51SJacob Faibussowitsch   for (PetscInt ii = 0; ii < m; ii++) {
3911ceb5bf51SJacob Faibussowitsch     const PetscInt row_len = i[ii + 1] - i[ii];
3912ceb5bf51SJacob Faibussowitsch 
3913ceb5bf51SJacob Faibussowitsch     baij->ilen[ii] = baij->imax[ii] = row_len;
3914ceb5bf51SJacob Faibussowitsch     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3915c75a6043SHong Zhang   }
391676bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
3917ceb5bf51SJacob Faibussowitsch     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
39186bdcaf15SBarry Smith       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
39196bdcaf15SBarry Smith       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3920c75a6043SHong Zhang     }
392176bd3646SJed Brown   }
3922c75a6043SHong Zhang 
39239566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
39249566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
39253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3926c75a6043SHong Zhang }
3927bdf6f3fcSHong Zhang 
3928d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3929d71ae5a4SJacob Faibussowitsch {
3930bdf6f3fcSHong Zhang   PetscFunctionBegin;
39319566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
39323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3933bdf6f3fcSHong Zhang }
3934