xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 28636b0c633bd14cfd465c7aa185d082b14b914a)
12593348eSBarry Smith /*
2b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
32593348eSBarry Smith   matrix storage format.
42593348eSBarry Smith */
5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
6c6db04a5SJed Brown #include <petscblaslapack.h>
7af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
943516a2dSKris Buschelman 
1026cec326SBarry Smith /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
1126cec326SBarry Smith #define TYPE BAIJ
1226cec326SBarry Smith #define TYPE_BS
1326cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1426cec326SBarry Smith #undef TYPE_BS
1526cec326SBarry Smith #define TYPE_BS _BS
1626cec326SBarry Smith #define TYPE_BS_ON
1726cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1826cec326SBarry Smith #undef TYPE_BS
1926cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmat.h"
2026cec326SBarry Smith #undef TYPE
2126cec326SBarry Smith #undef TYPE_BS_ON
2226cec326SBarry Smith 
237ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
247ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
257ea3e4caSstefano_zampini #endif
267ea3e4caSstefano_zampini 
27b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29b5b72c8aSIrina Sokolova #endif
30c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31b5b72c8aSIrina Sokolova 
32ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
33d71ae5a4SJacob Faibussowitsch {
349463ebdaSPierre Jolivet   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
35ff6a9541SJacob Faibussowitsch   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
369463ebdaSPierre Jolivet   MatScalar   *a_val = a_aij->a;
379463ebdaSPierre Jolivet 
389463ebdaSPierre Jolivet   PetscFunctionBegin;
399566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
40ff6a9541SJacob Faibussowitsch   PetscCall(PetscArrayzero(reductions, n));
419463ebdaSPierre Jolivet   if (type == NORM_2) {
42ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
439463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
449463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
45857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
469463ebdaSPierre Jolivet           a_val++;
479463ebdaSPierre Jolivet         }
489463ebdaSPierre Jolivet       }
499463ebdaSPierre Jolivet     }
509463ebdaSPierre Jolivet   } else if (type == NORM_1) {
51ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
529463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
539463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
54857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
559463ebdaSPierre Jolivet           a_val++;
569463ebdaSPierre Jolivet         }
579463ebdaSPierre Jolivet       }
589463ebdaSPierre Jolivet     }
599463ebdaSPierre Jolivet   } else if (type == NORM_INFINITY) {
60ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
619463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
629463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
636497c311SBarry Smith           PetscInt col    = A->cmap->rstart + a_aij->j[i] * bs + jb;
64857cbf51SRichard Tran Mills           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
659463ebdaSPierre Jolivet           a_val++;
669463ebdaSPierre Jolivet         }
679463ebdaSPierre Jolivet       }
689463ebdaSPierre Jolivet     }
69857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
70ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
71857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
72857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
73857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
74857cbf51SRichard Tran Mills           a_val++;
75857cbf51SRichard Tran Mills         }
76857cbf51SRichard Tran Mills       }
77857cbf51SRichard Tran Mills     }
78857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
80857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
81857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
82857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
83857cbf51SRichard Tran Mills           a_val++;
84857cbf51SRichard Tran Mills         }
85857cbf51SRichard Tran Mills       }
86857cbf51SRichard Tran Mills     }
87857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
889463ebdaSPierre Jolivet   if (type == NORM_2) {
89ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
90857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
91ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
929463ebdaSPierre Jolivet   }
933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
949463ebdaSPierre Jolivet }
959463ebdaSPierre Jolivet 
9666976f2fSJacob Faibussowitsch static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
97d71ae5a4SJacob Faibussowitsch {
98b01c7715SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
99de80f912SBarry Smith   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
1007f0c90edSBarry Smith   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
10162bba022SBarry Smith   PetscReal    shift = 0.0;
1021a9391e3SHong Zhang   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
103b01c7715SBarry Smith 
104b01c7715SBarry Smith   PetscFunctionBegin;
105a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
106a455e926SHong Zhang 
1079797317bSBarry Smith   if (a->idiagvalid) {
1089797317bSBarry Smith     if (values) *values = a->idiag;
1093ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1109797317bSBarry Smith   }
1119566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
112b01c7715SBarry Smith   diag_offset = a->diag;
1133a7d0413SPierre Jolivet   if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
114b01c7715SBarry Smith   diag = a->idiag;
115bbead8a2SBarry Smith   if (values) *values = a->idiag;
116b01c7715SBarry Smith   /* factor and invert each block */
117521d7252SBarry Smith   switch (bs) {
118ab040260SJed Brown   case 1:
119ab040260SJed Brown     for (i = 0; i < mbs; i++) {
120ab040260SJed Brown       odiag   = v + 1 * diag_offset[i];
121ab040260SJed Brown       diag[0] = odiag[0];
122ec1892c8SHong Zhang 
123ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
124966bd95aSPierre Jolivet         PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
1257b6c816cSBarry Smith         A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1267b6c816cSBarry Smith         A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1277b6c816cSBarry Smith         A->factorerror_zeropivot_row   = i;
1289566063dSJacob Faibussowitsch         PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
129ec1892c8SHong Zhang       }
130ec1892c8SHong Zhang 
131d4a378daSJed Brown       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
132ab040260SJed Brown       diag += 1;
133ab040260SJed Brown     }
134ab040260SJed Brown     break;
135b01c7715SBarry Smith   case 2:
136b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
137b01c7715SBarry Smith       odiag   = v + 4 * diag_offset[i];
1389371c9d4SSatish Balay       diag[0] = odiag[0];
1399371c9d4SSatish Balay       diag[1] = odiag[1];
1409371c9d4SSatish Balay       diag[2] = odiag[2];
1419371c9d4SSatish Balay       diag[3] = odiag[3];
1429566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1437b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
144b01c7715SBarry Smith       diag += 4;
145b01c7715SBarry Smith     }
146b01c7715SBarry Smith     break;
147b01c7715SBarry Smith   case 3:
148b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
149b01c7715SBarry Smith       odiag   = v + 9 * diag_offset[i];
1509371c9d4SSatish Balay       diag[0] = odiag[0];
1519371c9d4SSatish Balay       diag[1] = odiag[1];
1529371c9d4SSatish Balay       diag[2] = odiag[2];
1539371c9d4SSatish Balay       diag[3] = odiag[3];
1549371c9d4SSatish Balay       diag[4] = odiag[4];
1559371c9d4SSatish Balay       diag[5] = odiag[5];
1569371c9d4SSatish Balay       diag[6] = odiag[6];
1579371c9d4SSatish Balay       diag[7] = odiag[7];
158b01c7715SBarry Smith       diag[8] = odiag[8];
1599566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
1607b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
161b01c7715SBarry Smith       diag += 9;
162b01c7715SBarry Smith     }
163b01c7715SBarry Smith     break;
164b01c7715SBarry Smith   case 4:
165b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
166b01c7715SBarry Smith       odiag = v + 16 * diag_offset[i];
1679566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 16));
1689566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
1697b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
170b01c7715SBarry Smith       diag += 16;
171b01c7715SBarry Smith     }
172b01c7715SBarry Smith     break;
173b01c7715SBarry Smith   case 5:
174b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
175b01c7715SBarry Smith       odiag = v + 25 * diag_offset[i];
1769566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 25));
1779566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
1787b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
179b01c7715SBarry Smith       diag += 25;
180b01c7715SBarry Smith     }
181b01c7715SBarry Smith     break;
182d49b2adcSBarry Smith   case 6:
183d49b2adcSBarry Smith     for (i = 0; i < mbs; i++) {
184d49b2adcSBarry Smith       odiag = v + 36 * diag_offset[i];
1859566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 36));
1869566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
1877b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
188d49b2adcSBarry Smith       diag += 36;
189d49b2adcSBarry Smith     }
190d49b2adcSBarry Smith     break;
191de80f912SBarry Smith   case 7:
192de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
193de80f912SBarry Smith       odiag = v + 49 * diag_offset[i];
1949566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 49));
1959566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
1967b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
197de80f912SBarry Smith       diag += 49;
198de80f912SBarry Smith     }
199de80f912SBarry Smith     break;
200b01c7715SBarry Smith   default:
2019566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
202de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
203de80f912SBarry Smith       odiag = v + bs2 * diag_offset[i];
2049566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, bs2));
2059566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
2067b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
207de80f912SBarry Smith       diag += bs2;
208de80f912SBarry Smith     }
2099566063dSJacob Faibussowitsch     PetscCall(PetscFree2(v_work, v_pivots));
210b01c7715SBarry Smith   }
211b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
2123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
213b01c7715SBarry Smith }
214b01c7715SBarry Smith 
21566976f2fSJacob Faibussowitsch static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
216d71ae5a4SJacob Faibussowitsch {
2176d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
218e48d15efSToby Isaac   PetscScalar       *x, *work, *w, *workt, *t;
219e48d15efSToby Isaac   const MatScalar   *v, *aa = a->a, *idiag;
220e48d15efSToby Isaac   const PetscScalar *b, *xb;
2215455b99fSToby Isaac   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
222e48d15efSToby Isaac   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
223c1ac3661SBarry Smith   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
224b01c7715SBarry Smith 
225b01c7715SBarry Smith   PetscFunctionBegin;
226b01c7715SBarry Smith   its = its * lits;
2275f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
2285f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2295f80ce2aSJacob Faibussowitsch   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
2305f80ce2aSJacob Faibussowitsch   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
2315f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
232b01c7715SBarry Smith 
2339566063dSJacob Faibussowitsch   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
234b01c7715SBarry Smith 
2353ba16761SJacob Faibussowitsch   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
236b01c7715SBarry Smith   diag  = a->diag;
237b01c7715SBarry Smith   idiag = a->idiag;
238de80f912SBarry Smith   k     = PetscMax(A->rmap->n, A->cmap->n);
23948a46eb9SPierre Jolivet   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
24048a46eb9SPierre Jolivet   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
24148a46eb9SPierre Jolivet   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
2423475c22fSBarry Smith   work = a->mult_work;
2433475c22fSBarry Smith   t    = a->sor_workt;
244de80f912SBarry Smith   w    = a->sor_work;
245de80f912SBarry Smith 
2469566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
2479566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
248de80f912SBarry Smith 
249de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
250de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
251e48d15efSToby Isaac       switch (bs) {
252e48d15efSToby Isaac       case 1:
253e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
254e48d15efSToby Isaac         t[0] = b[0];
255e48d15efSToby Isaac         i2   = 1;
256e48d15efSToby Isaac         idiag += 1;
257e48d15efSToby Isaac         for (i = 1; i < m; i++) {
258e48d15efSToby Isaac           v    = aa + ai[i];
259e48d15efSToby Isaac           vi   = aj + ai[i];
260e48d15efSToby Isaac           nz   = diag[i] - ai[i];
261e48d15efSToby Isaac           s[0] = b[i2];
262e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
263e48d15efSToby Isaac             xw[0] = x[vi[j]];
264e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
265e48d15efSToby Isaac           }
266e48d15efSToby Isaac           t[i2] = s[0];
267e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
268e48d15efSToby Isaac           x[i2] = xw[0];
269e48d15efSToby Isaac           idiag += 1;
270e48d15efSToby Isaac           i2 += 1;
271e48d15efSToby Isaac         }
272e48d15efSToby Isaac         break;
273e48d15efSToby Isaac       case 2:
274e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
2759371c9d4SSatish Balay         t[0] = b[0];
2769371c9d4SSatish Balay         t[1] = b[1];
277e48d15efSToby Isaac         i2   = 2;
278e48d15efSToby Isaac         idiag += 4;
279e48d15efSToby Isaac         for (i = 1; i < m; i++) {
280e48d15efSToby Isaac           v    = aa + 4 * ai[i];
281e48d15efSToby Isaac           vi   = aj + ai[i];
282e48d15efSToby Isaac           nz   = diag[i] - ai[i];
2839371c9d4SSatish Balay           s[0] = b[i2];
2849371c9d4SSatish Balay           s[1] = b[i2 + 1];
285e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
286e48d15efSToby Isaac             idx   = 2 * vi[j];
287e48d15efSToby Isaac             it    = 4 * j;
2889371c9d4SSatish Balay             xw[0] = x[idx];
2899371c9d4SSatish Balay             xw[1] = x[1 + idx];
290e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
291e48d15efSToby Isaac           }
2929371c9d4SSatish Balay           t[i2]     = s[0];
2939371c9d4SSatish Balay           t[i2 + 1] = s[1];
294e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
2959371c9d4SSatish Balay           x[i2]     = xw[0];
2969371c9d4SSatish Balay           x[i2 + 1] = xw[1];
297e48d15efSToby Isaac           idiag += 4;
298e48d15efSToby Isaac           i2 += 2;
299e48d15efSToby Isaac         }
300e48d15efSToby Isaac         break;
301e48d15efSToby Isaac       case 3:
302e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
3039371c9d4SSatish Balay         t[0] = b[0];
3049371c9d4SSatish Balay         t[1] = b[1];
3059371c9d4SSatish Balay         t[2] = b[2];
306e48d15efSToby Isaac         i2   = 3;
307e48d15efSToby Isaac         idiag += 9;
308e48d15efSToby Isaac         for (i = 1; i < m; i++) {
309e48d15efSToby Isaac           v    = aa + 9 * ai[i];
310e48d15efSToby Isaac           vi   = aj + ai[i];
311e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3129371c9d4SSatish Balay           s[0] = b[i2];
3139371c9d4SSatish Balay           s[1] = b[i2 + 1];
3149371c9d4SSatish Balay           s[2] = b[i2 + 2];
315e48d15efSToby Isaac           while (nz--) {
316e48d15efSToby Isaac             idx   = 3 * (*vi++);
3179371c9d4SSatish Balay             xw[0] = x[idx];
3189371c9d4SSatish Balay             xw[1] = x[1 + idx];
3199371c9d4SSatish Balay             xw[2] = x[2 + idx];
320e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
321e48d15efSToby Isaac             v += 9;
322e48d15efSToby Isaac           }
3239371c9d4SSatish Balay           t[i2]     = s[0];
3249371c9d4SSatish Balay           t[i2 + 1] = s[1];
3259371c9d4SSatish Balay           t[i2 + 2] = s[2];
326e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
3279371c9d4SSatish Balay           x[i2]     = xw[0];
3289371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3299371c9d4SSatish Balay           x[i2 + 2] = xw[2];
330e48d15efSToby Isaac           idiag += 9;
331e48d15efSToby Isaac           i2 += 3;
332e48d15efSToby Isaac         }
333e48d15efSToby Isaac         break;
334e48d15efSToby Isaac       case 4:
335e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
3369371c9d4SSatish Balay         t[0] = b[0];
3379371c9d4SSatish Balay         t[1] = b[1];
3389371c9d4SSatish Balay         t[2] = b[2];
3399371c9d4SSatish Balay         t[3] = b[3];
340e48d15efSToby Isaac         i2   = 4;
341e48d15efSToby Isaac         idiag += 16;
342e48d15efSToby Isaac         for (i = 1; i < m; i++) {
343e48d15efSToby Isaac           v    = aa + 16 * ai[i];
344e48d15efSToby Isaac           vi   = aj + ai[i];
345e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3469371c9d4SSatish Balay           s[0] = b[i2];
3479371c9d4SSatish Balay           s[1] = b[i2 + 1];
3489371c9d4SSatish Balay           s[2] = b[i2 + 2];
3499371c9d4SSatish Balay           s[3] = b[i2 + 3];
350e48d15efSToby Isaac           while (nz--) {
351e48d15efSToby Isaac             idx   = 4 * (*vi++);
3529371c9d4SSatish Balay             xw[0] = x[idx];
3539371c9d4SSatish Balay             xw[1] = x[1 + idx];
3549371c9d4SSatish Balay             xw[2] = x[2 + idx];
3559371c9d4SSatish Balay             xw[3] = x[3 + idx];
356e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
357e48d15efSToby Isaac             v += 16;
358e48d15efSToby Isaac           }
3599371c9d4SSatish Balay           t[i2]     = s[0];
3609371c9d4SSatish Balay           t[i2 + 1] = s[1];
3619371c9d4SSatish Balay           t[i2 + 2] = s[2];
3629371c9d4SSatish Balay           t[i2 + 3] = s[3];
363e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
3649371c9d4SSatish Balay           x[i2]     = xw[0];
3659371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3669371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3679371c9d4SSatish Balay           x[i2 + 3] = xw[3];
368e48d15efSToby Isaac           idiag += 16;
369e48d15efSToby Isaac           i2 += 4;
370e48d15efSToby Isaac         }
371e48d15efSToby Isaac         break;
372e48d15efSToby Isaac       case 5:
373e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
3749371c9d4SSatish Balay         t[0] = b[0];
3759371c9d4SSatish Balay         t[1] = b[1];
3769371c9d4SSatish Balay         t[2] = b[2];
3779371c9d4SSatish Balay         t[3] = b[3];
3789371c9d4SSatish Balay         t[4] = b[4];
379e48d15efSToby Isaac         i2   = 5;
380e48d15efSToby Isaac         idiag += 25;
381e48d15efSToby Isaac         for (i = 1; i < m; i++) {
382e48d15efSToby Isaac           v    = aa + 25 * ai[i];
383e48d15efSToby Isaac           vi   = aj + ai[i];
384e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3859371c9d4SSatish Balay           s[0] = b[i2];
3869371c9d4SSatish Balay           s[1] = b[i2 + 1];
3879371c9d4SSatish Balay           s[2] = b[i2 + 2];
3889371c9d4SSatish Balay           s[3] = b[i2 + 3];
3899371c9d4SSatish Balay           s[4] = b[i2 + 4];
390e48d15efSToby Isaac           while (nz--) {
391e48d15efSToby Isaac             idx   = 5 * (*vi++);
3929371c9d4SSatish Balay             xw[0] = x[idx];
3939371c9d4SSatish Balay             xw[1] = x[1 + idx];
3949371c9d4SSatish Balay             xw[2] = x[2 + idx];
3959371c9d4SSatish Balay             xw[3] = x[3 + idx];
3969371c9d4SSatish Balay             xw[4] = x[4 + idx];
397e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
398e48d15efSToby Isaac             v += 25;
399e48d15efSToby Isaac           }
4009371c9d4SSatish Balay           t[i2]     = s[0];
4019371c9d4SSatish Balay           t[i2 + 1] = s[1];
4029371c9d4SSatish Balay           t[i2 + 2] = s[2];
4039371c9d4SSatish Balay           t[i2 + 3] = s[3];
4049371c9d4SSatish Balay           t[i2 + 4] = s[4];
405e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
4069371c9d4SSatish Balay           x[i2]     = xw[0];
4079371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4089371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4099371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4109371c9d4SSatish Balay           x[i2 + 4] = xw[4];
411e48d15efSToby Isaac           idiag += 25;
412e48d15efSToby Isaac           i2 += 5;
413e48d15efSToby Isaac         }
414e48d15efSToby Isaac         break;
415e48d15efSToby Isaac       case 6:
416e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
4179371c9d4SSatish Balay         t[0] = b[0];
4189371c9d4SSatish Balay         t[1] = b[1];
4199371c9d4SSatish Balay         t[2] = b[2];
4209371c9d4SSatish Balay         t[3] = b[3];
4219371c9d4SSatish Balay         t[4] = b[4];
4229371c9d4SSatish Balay         t[5] = b[5];
423e48d15efSToby Isaac         i2   = 6;
424e48d15efSToby Isaac         idiag += 36;
425e48d15efSToby Isaac         for (i = 1; i < m; i++) {
426e48d15efSToby Isaac           v    = aa + 36 * ai[i];
427e48d15efSToby Isaac           vi   = aj + ai[i];
428e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4299371c9d4SSatish Balay           s[0] = b[i2];
4309371c9d4SSatish Balay           s[1] = b[i2 + 1];
4319371c9d4SSatish Balay           s[2] = b[i2 + 2];
4329371c9d4SSatish Balay           s[3] = b[i2 + 3];
4339371c9d4SSatish Balay           s[4] = b[i2 + 4];
4349371c9d4SSatish Balay           s[5] = b[i2 + 5];
435e48d15efSToby Isaac           while (nz--) {
436e48d15efSToby Isaac             idx   = 6 * (*vi++);
4379371c9d4SSatish Balay             xw[0] = x[idx];
4389371c9d4SSatish Balay             xw[1] = x[1 + idx];
4399371c9d4SSatish Balay             xw[2] = x[2 + idx];
4409371c9d4SSatish Balay             xw[3] = x[3 + idx];
4419371c9d4SSatish Balay             xw[4] = x[4 + idx];
4429371c9d4SSatish Balay             xw[5] = x[5 + idx];
443e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
444e48d15efSToby Isaac             v += 36;
445e48d15efSToby Isaac           }
4469371c9d4SSatish Balay           t[i2]     = s[0];
4479371c9d4SSatish Balay           t[i2 + 1] = s[1];
4489371c9d4SSatish Balay           t[i2 + 2] = s[2];
4499371c9d4SSatish Balay           t[i2 + 3] = s[3];
4509371c9d4SSatish Balay           t[i2 + 4] = s[4];
4519371c9d4SSatish Balay           t[i2 + 5] = s[5];
452e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
4539371c9d4SSatish Balay           x[i2]     = xw[0];
4549371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4559371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4569371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4579371c9d4SSatish Balay           x[i2 + 4] = xw[4];
4589371c9d4SSatish Balay           x[i2 + 5] = xw[5];
459e48d15efSToby Isaac           idiag += 36;
460e48d15efSToby Isaac           i2 += 6;
461e48d15efSToby Isaac         }
462e48d15efSToby Isaac         break;
463e48d15efSToby Isaac       case 7:
464e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
4659371c9d4SSatish Balay         t[0] = b[0];
4669371c9d4SSatish Balay         t[1] = b[1];
4679371c9d4SSatish Balay         t[2] = b[2];
4689371c9d4SSatish Balay         t[3] = b[3];
4699371c9d4SSatish Balay         t[4] = b[4];
4709371c9d4SSatish Balay         t[5] = b[5];
4719371c9d4SSatish Balay         t[6] = b[6];
472e48d15efSToby Isaac         i2   = 7;
473e48d15efSToby Isaac         idiag += 49;
474e48d15efSToby Isaac         for (i = 1; i < m; i++) {
475e48d15efSToby Isaac           v    = aa + 49 * ai[i];
476e48d15efSToby Isaac           vi   = aj + ai[i];
477e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4789371c9d4SSatish Balay           s[0] = b[i2];
4799371c9d4SSatish Balay           s[1] = b[i2 + 1];
4809371c9d4SSatish Balay           s[2] = b[i2 + 2];
4819371c9d4SSatish Balay           s[3] = b[i2 + 3];
4829371c9d4SSatish Balay           s[4] = b[i2 + 4];
4839371c9d4SSatish Balay           s[5] = b[i2 + 5];
4849371c9d4SSatish Balay           s[6] = b[i2 + 6];
485e48d15efSToby Isaac           while (nz--) {
486e48d15efSToby Isaac             idx   = 7 * (*vi++);
4879371c9d4SSatish Balay             xw[0] = x[idx];
4889371c9d4SSatish Balay             xw[1] = x[1 + idx];
4899371c9d4SSatish Balay             xw[2] = x[2 + idx];
4909371c9d4SSatish Balay             xw[3] = x[3 + idx];
4919371c9d4SSatish Balay             xw[4] = x[4 + idx];
4929371c9d4SSatish Balay             xw[5] = x[5 + idx];
4939371c9d4SSatish Balay             xw[6] = x[6 + idx];
494e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
495e48d15efSToby Isaac             v += 49;
496e48d15efSToby Isaac           }
4979371c9d4SSatish Balay           t[i2]     = s[0];
4989371c9d4SSatish Balay           t[i2 + 1] = s[1];
4999371c9d4SSatish Balay           t[i2 + 2] = s[2];
5009371c9d4SSatish Balay           t[i2 + 3] = s[3];
5019371c9d4SSatish Balay           t[i2 + 4] = s[4];
5029371c9d4SSatish Balay           t[i2 + 5] = s[5];
5039371c9d4SSatish Balay           t[i2 + 6] = s[6];
504e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
5059371c9d4SSatish Balay           x[i2]     = xw[0];
5069371c9d4SSatish Balay           x[i2 + 1] = xw[1];
5079371c9d4SSatish Balay           x[i2 + 2] = xw[2];
5089371c9d4SSatish Balay           x[i2 + 3] = xw[3];
5099371c9d4SSatish Balay           x[i2 + 4] = xw[4];
5109371c9d4SSatish Balay           x[i2 + 5] = xw[5];
5119371c9d4SSatish Balay           x[i2 + 6] = xw[6];
512e48d15efSToby Isaac           idiag += 49;
513e48d15efSToby Isaac           i2 += 7;
514e48d15efSToby Isaac         }
515e48d15efSToby Isaac         break;
516e48d15efSToby Isaac       default:
51796b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
5189566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(t, b, bs));
519de80f912SBarry Smith         i2 = bs;
520de80f912SBarry Smith         idiag += bs2;
521de80f912SBarry Smith         for (i = 1; i < m; i++) {
522de80f912SBarry Smith           v  = aa + bs2 * ai[i];
523de80f912SBarry Smith           vi = aj + ai[i];
524de80f912SBarry Smith           nz = diag[i] - ai[i];
525de80f912SBarry Smith 
5269566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
527de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
528de80f912SBarry Smith           workt = work;
529de80f912SBarry Smith           for (j = 0; j < nz; j++) {
5309566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
531de80f912SBarry Smith             workt += bs;
532de80f912SBarry Smith           }
53396b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
5349566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(t + i2, w, bs));
53596b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
536de80f912SBarry Smith 
537de80f912SBarry Smith           idiag += bs2;
538de80f912SBarry Smith           i2 += bs;
539de80f912SBarry Smith         }
540e48d15efSToby Isaac         break;
541e48d15efSToby Isaac       }
542de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
5439566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
544e48d15efSToby Isaac       xb = t;
5459371c9d4SSatish Balay     } else xb = b;
546de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
547e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
548e48d15efSToby Isaac       i2    = bs * (m - 1);
549e48d15efSToby Isaac       switch (bs) {
550e48d15efSToby Isaac       case 1:
551e48d15efSToby Isaac         s[0] = xb[i2];
552e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
553e48d15efSToby Isaac         x[i2] = xw[0];
554e48d15efSToby Isaac         i2 -= 1;
555e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
556e48d15efSToby Isaac           v    = aa + (diag[i] + 1);
557e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
558e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
559e48d15efSToby Isaac           s[0] = xb[i2];
560e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
561e48d15efSToby Isaac             xw[0] = x[vi[j]];
562e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
563e48d15efSToby Isaac           }
564e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
565e48d15efSToby Isaac           x[i2] = xw[0];
566e48d15efSToby Isaac           idiag -= 1;
567e48d15efSToby Isaac           i2 -= 1;
568e48d15efSToby Isaac         }
569e48d15efSToby Isaac         break;
570e48d15efSToby Isaac       case 2:
5719371c9d4SSatish Balay         s[0] = xb[i2];
5729371c9d4SSatish Balay         s[1] = xb[i2 + 1];
573e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5749371c9d4SSatish Balay         x[i2]     = xw[0];
5759371c9d4SSatish Balay         x[i2 + 1] = xw[1];
576e48d15efSToby Isaac         i2 -= 2;
577e48d15efSToby Isaac         idiag -= 4;
578e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
579e48d15efSToby Isaac           v    = aa + 4 * (diag[i] + 1);
580e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
581e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
5829371c9d4SSatish Balay           s[0] = xb[i2];
5839371c9d4SSatish Balay           s[1] = xb[i2 + 1];
584e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
585e48d15efSToby Isaac             idx   = 2 * vi[j];
586e48d15efSToby Isaac             it    = 4 * j;
5879371c9d4SSatish Balay             xw[0] = x[idx];
5889371c9d4SSatish Balay             xw[1] = x[1 + idx];
589e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
590e48d15efSToby Isaac           }
591e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5929371c9d4SSatish Balay           x[i2]     = xw[0];
5939371c9d4SSatish Balay           x[i2 + 1] = xw[1];
594e48d15efSToby Isaac           idiag -= 4;
595e48d15efSToby Isaac           i2 -= 2;
596e48d15efSToby Isaac         }
597e48d15efSToby Isaac         break;
598e48d15efSToby Isaac       case 3:
5999371c9d4SSatish Balay         s[0] = xb[i2];
6009371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6019371c9d4SSatish Balay         s[2] = xb[i2 + 2];
602e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6039371c9d4SSatish Balay         x[i2]     = xw[0];
6049371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6059371c9d4SSatish Balay         x[i2 + 2] = xw[2];
606e48d15efSToby Isaac         i2 -= 3;
607e48d15efSToby Isaac         idiag -= 9;
608e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
609e48d15efSToby Isaac           v    = aa + 9 * (diag[i] + 1);
610e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
611e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6129371c9d4SSatish Balay           s[0] = xb[i2];
6139371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6149371c9d4SSatish Balay           s[2] = xb[i2 + 2];
615e48d15efSToby Isaac           while (nz--) {
616e48d15efSToby Isaac             idx   = 3 * (*vi++);
6179371c9d4SSatish Balay             xw[0] = x[idx];
6189371c9d4SSatish Balay             xw[1] = x[1 + idx];
6199371c9d4SSatish Balay             xw[2] = x[2 + idx];
620e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
621e48d15efSToby Isaac             v += 9;
622e48d15efSToby Isaac           }
623e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6249371c9d4SSatish Balay           x[i2]     = xw[0];
6259371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6269371c9d4SSatish Balay           x[i2 + 2] = xw[2];
627e48d15efSToby Isaac           idiag -= 9;
628e48d15efSToby Isaac           i2 -= 3;
629e48d15efSToby Isaac         }
630e48d15efSToby Isaac         break;
631e48d15efSToby Isaac       case 4:
6329371c9d4SSatish Balay         s[0] = xb[i2];
6339371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6349371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6359371c9d4SSatish Balay         s[3] = xb[i2 + 3];
636e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6379371c9d4SSatish Balay         x[i2]     = xw[0];
6389371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6399371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6409371c9d4SSatish Balay         x[i2 + 3] = xw[3];
641e48d15efSToby Isaac         i2 -= 4;
642e48d15efSToby Isaac         idiag -= 16;
643e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
644e48d15efSToby Isaac           v    = aa + 16 * (diag[i] + 1);
645e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
646e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6479371c9d4SSatish Balay           s[0] = xb[i2];
6489371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6499371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6509371c9d4SSatish Balay           s[3] = xb[i2 + 3];
651e48d15efSToby Isaac           while (nz--) {
652e48d15efSToby Isaac             idx   = 4 * (*vi++);
6539371c9d4SSatish Balay             xw[0] = x[idx];
6549371c9d4SSatish Balay             xw[1] = x[1 + idx];
6559371c9d4SSatish Balay             xw[2] = x[2 + idx];
6569371c9d4SSatish Balay             xw[3] = x[3 + idx];
657e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
658e48d15efSToby Isaac             v += 16;
659e48d15efSToby Isaac           }
660e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6619371c9d4SSatish Balay           x[i2]     = xw[0];
6629371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6639371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6649371c9d4SSatish Balay           x[i2 + 3] = xw[3];
665e48d15efSToby Isaac           idiag -= 16;
666e48d15efSToby Isaac           i2 -= 4;
667e48d15efSToby Isaac         }
668e48d15efSToby Isaac         break;
669e48d15efSToby Isaac       case 5:
6709371c9d4SSatish Balay         s[0] = xb[i2];
6719371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6729371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6739371c9d4SSatish Balay         s[3] = xb[i2 + 3];
6749371c9d4SSatish Balay         s[4] = xb[i2 + 4];
675e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6769371c9d4SSatish Balay         x[i2]     = xw[0];
6779371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6789371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6799371c9d4SSatish Balay         x[i2 + 3] = xw[3];
6809371c9d4SSatish Balay         x[i2 + 4] = xw[4];
681e48d15efSToby Isaac         i2 -= 5;
682e48d15efSToby Isaac         idiag -= 25;
683e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
684e48d15efSToby Isaac           v    = aa + 25 * (diag[i] + 1);
685e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
686e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6879371c9d4SSatish Balay           s[0] = xb[i2];
6889371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6899371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6909371c9d4SSatish Balay           s[3] = xb[i2 + 3];
6919371c9d4SSatish Balay           s[4] = xb[i2 + 4];
692e48d15efSToby Isaac           while (nz--) {
693e48d15efSToby Isaac             idx   = 5 * (*vi++);
6949371c9d4SSatish Balay             xw[0] = x[idx];
6959371c9d4SSatish Balay             xw[1] = x[1 + idx];
6969371c9d4SSatish Balay             xw[2] = x[2 + idx];
6979371c9d4SSatish Balay             xw[3] = x[3 + idx];
6989371c9d4SSatish Balay             xw[4] = x[4 + idx];
699e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
700e48d15efSToby Isaac             v += 25;
701e48d15efSToby Isaac           }
702e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
7039371c9d4SSatish Balay           x[i2]     = xw[0];
7049371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7059371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7069371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7079371c9d4SSatish Balay           x[i2 + 4] = xw[4];
708e48d15efSToby Isaac           idiag -= 25;
709e48d15efSToby Isaac           i2 -= 5;
710e48d15efSToby Isaac         }
711e48d15efSToby Isaac         break;
712e48d15efSToby Isaac       case 6:
7139371c9d4SSatish Balay         s[0] = xb[i2];
7149371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7159371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7169371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7179371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7189371c9d4SSatish Balay         s[5] = xb[i2 + 5];
719e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7209371c9d4SSatish Balay         x[i2]     = xw[0];
7219371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7229371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7239371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7249371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7259371c9d4SSatish Balay         x[i2 + 5] = xw[5];
726e48d15efSToby Isaac         i2 -= 6;
727e48d15efSToby Isaac         idiag -= 36;
728e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
729e48d15efSToby Isaac           v    = aa + 36 * (diag[i] + 1);
730e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
731e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7329371c9d4SSatish Balay           s[0] = xb[i2];
7339371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7349371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7359371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7369371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7379371c9d4SSatish Balay           s[5] = xb[i2 + 5];
738e48d15efSToby Isaac           while (nz--) {
739e48d15efSToby Isaac             idx   = 6 * (*vi++);
7409371c9d4SSatish Balay             xw[0] = x[idx];
7419371c9d4SSatish Balay             xw[1] = x[1 + idx];
7429371c9d4SSatish Balay             xw[2] = x[2 + idx];
7439371c9d4SSatish Balay             xw[3] = x[3 + idx];
7449371c9d4SSatish Balay             xw[4] = x[4 + idx];
7459371c9d4SSatish Balay             xw[5] = x[5 + idx];
746e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
747e48d15efSToby Isaac             v += 36;
748e48d15efSToby Isaac           }
749e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7509371c9d4SSatish Balay           x[i2]     = xw[0];
7519371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7529371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7539371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7549371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7559371c9d4SSatish Balay           x[i2 + 5] = xw[5];
756e48d15efSToby Isaac           idiag -= 36;
757e48d15efSToby Isaac           i2 -= 6;
758e48d15efSToby Isaac         }
759e48d15efSToby Isaac         break;
760e48d15efSToby Isaac       case 7:
7619371c9d4SSatish Balay         s[0] = xb[i2];
7629371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7639371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7649371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7659371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7669371c9d4SSatish Balay         s[5] = xb[i2 + 5];
7679371c9d4SSatish Balay         s[6] = xb[i2 + 6];
768e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
7699371c9d4SSatish Balay         x[i2]     = xw[0];
7709371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7719371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7729371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7739371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7749371c9d4SSatish Balay         x[i2 + 5] = xw[5];
7759371c9d4SSatish Balay         x[i2 + 6] = xw[6];
776e48d15efSToby Isaac         i2 -= 7;
777e48d15efSToby Isaac         idiag -= 49;
778e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
779e48d15efSToby Isaac           v    = aa + 49 * (diag[i] + 1);
780e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
781e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7829371c9d4SSatish Balay           s[0] = xb[i2];
7839371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7849371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7859371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7869371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7879371c9d4SSatish Balay           s[5] = xb[i2 + 5];
7889371c9d4SSatish Balay           s[6] = xb[i2 + 6];
789e48d15efSToby Isaac           while (nz--) {
790e48d15efSToby Isaac             idx   = 7 * (*vi++);
7919371c9d4SSatish Balay             xw[0] = x[idx];
7929371c9d4SSatish Balay             xw[1] = x[1 + idx];
7939371c9d4SSatish Balay             xw[2] = x[2 + idx];
7949371c9d4SSatish Balay             xw[3] = x[3 + idx];
7959371c9d4SSatish Balay             xw[4] = x[4 + idx];
7969371c9d4SSatish Balay             xw[5] = x[5 + idx];
7979371c9d4SSatish Balay             xw[6] = x[6 + idx];
798e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
799e48d15efSToby Isaac             v += 49;
800e48d15efSToby Isaac           }
801e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
8029371c9d4SSatish Balay           x[i2]     = xw[0];
8039371c9d4SSatish Balay           x[i2 + 1] = xw[1];
8049371c9d4SSatish Balay           x[i2 + 2] = xw[2];
8059371c9d4SSatish Balay           x[i2 + 3] = xw[3];
8069371c9d4SSatish Balay           x[i2 + 4] = xw[4];
8079371c9d4SSatish Balay           x[i2 + 5] = xw[5];
8089371c9d4SSatish Balay           x[i2 + 6] = xw[6];
809e48d15efSToby Isaac           idiag -= 49;
810e48d15efSToby Isaac           i2 -= 7;
811e48d15efSToby Isaac         }
812e48d15efSToby Isaac         break;
813e48d15efSToby Isaac       default:
8149566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(w, xb + i2, bs));
81596b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
816de80f912SBarry Smith         i2 -= bs;
817e48d15efSToby Isaac         idiag -= bs2;
818de80f912SBarry Smith         for (i = m - 2; i >= 0; i--) {
819de80f912SBarry Smith           v  = aa + bs2 * (diag[i] + 1);
820de80f912SBarry Smith           vi = aj + diag[i] + 1;
821de80f912SBarry Smith           nz = ai[i + 1] - diag[i] - 1;
822de80f912SBarry Smith 
8239566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, xb + i2, bs));
824de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
825de80f912SBarry Smith           workt = work;
826de80f912SBarry Smith           for (j = 0; j < nz; j++) {
8279566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
828de80f912SBarry Smith             workt += bs;
829de80f912SBarry Smith           }
83096b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
83196b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
832e48d15efSToby Isaac 
833de80f912SBarry Smith           idiag -= bs2;
834de80f912SBarry Smith           i2 -= bs;
835de80f912SBarry Smith         }
836e48d15efSToby Isaac         break;
837e48d15efSToby Isaac       }
8389566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
839de80f912SBarry Smith     }
840e48d15efSToby Isaac     its--;
841e48d15efSToby Isaac   }
842e48d15efSToby Isaac   while (its--) {
843e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
844e48d15efSToby Isaac       idiag = a->idiag;
845e48d15efSToby Isaac       i2    = 0;
846e48d15efSToby Isaac       switch (bs) {
847e48d15efSToby Isaac       case 1:
848e48d15efSToby Isaac         for (i = 0; i < m; i++) {
849e48d15efSToby Isaac           v    = aa + ai[i];
850e48d15efSToby Isaac           vi   = aj + ai[i];
851e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
852e48d15efSToby Isaac           s[0] = b[i2];
853e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
854e48d15efSToby Isaac             xw[0] = x[vi[j]];
855e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
856e48d15efSToby Isaac           }
857e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
858e48d15efSToby Isaac           x[i2] += xw[0];
859e48d15efSToby Isaac           idiag += 1;
860e48d15efSToby Isaac           i2 += 1;
861e48d15efSToby Isaac         }
862e48d15efSToby Isaac         break;
863e48d15efSToby Isaac       case 2:
864e48d15efSToby Isaac         for (i = 0; i < m; i++) {
865e48d15efSToby Isaac           v    = aa + 4 * ai[i];
866e48d15efSToby Isaac           vi   = aj + ai[i];
867e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8689371c9d4SSatish Balay           s[0] = b[i2];
8699371c9d4SSatish Balay           s[1] = b[i2 + 1];
870e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
871e48d15efSToby Isaac             idx   = 2 * vi[j];
872e48d15efSToby Isaac             it    = 4 * j;
8739371c9d4SSatish Balay             xw[0] = x[idx];
8749371c9d4SSatish Balay             xw[1] = x[1 + idx];
875e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
876e48d15efSToby Isaac           }
877e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
8789371c9d4SSatish Balay           x[i2] += xw[0];
8799371c9d4SSatish Balay           x[i2 + 1] += xw[1];
880e48d15efSToby Isaac           idiag += 4;
881e48d15efSToby Isaac           i2 += 2;
882e48d15efSToby Isaac         }
883e48d15efSToby Isaac         break;
884e48d15efSToby Isaac       case 3:
885e48d15efSToby Isaac         for (i = 0; i < m; i++) {
886e48d15efSToby Isaac           v    = aa + 9 * ai[i];
887e48d15efSToby Isaac           vi   = aj + ai[i];
888e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8899371c9d4SSatish Balay           s[0] = b[i2];
8909371c9d4SSatish Balay           s[1] = b[i2 + 1];
8919371c9d4SSatish Balay           s[2] = b[i2 + 2];
892e48d15efSToby Isaac           while (nz--) {
893e48d15efSToby Isaac             idx   = 3 * (*vi++);
8949371c9d4SSatish Balay             xw[0] = x[idx];
8959371c9d4SSatish Balay             xw[1] = x[1 + idx];
8969371c9d4SSatish Balay             xw[2] = x[2 + idx];
897e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
898e48d15efSToby Isaac             v += 9;
899e48d15efSToby Isaac           }
900e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
9019371c9d4SSatish Balay           x[i2] += xw[0];
9029371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9039371c9d4SSatish Balay           x[i2 + 2] += xw[2];
904e48d15efSToby Isaac           idiag += 9;
905e48d15efSToby Isaac           i2 += 3;
906e48d15efSToby Isaac         }
907e48d15efSToby Isaac         break;
908e48d15efSToby Isaac       case 4:
909e48d15efSToby Isaac         for (i = 0; i < m; i++) {
910e48d15efSToby Isaac           v    = aa + 16 * ai[i];
911e48d15efSToby Isaac           vi   = aj + ai[i];
912e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9139371c9d4SSatish Balay           s[0] = b[i2];
9149371c9d4SSatish Balay           s[1] = b[i2 + 1];
9159371c9d4SSatish Balay           s[2] = b[i2 + 2];
9169371c9d4SSatish Balay           s[3] = b[i2 + 3];
917e48d15efSToby Isaac           while (nz--) {
918e48d15efSToby Isaac             idx   = 4 * (*vi++);
9199371c9d4SSatish Balay             xw[0] = x[idx];
9209371c9d4SSatish Balay             xw[1] = x[1 + idx];
9219371c9d4SSatish Balay             xw[2] = x[2 + idx];
9229371c9d4SSatish Balay             xw[3] = x[3 + idx];
923e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
924e48d15efSToby Isaac             v += 16;
925e48d15efSToby Isaac           }
926e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
9279371c9d4SSatish Balay           x[i2] += xw[0];
9289371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9299371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9309371c9d4SSatish Balay           x[i2 + 3] += xw[3];
931e48d15efSToby Isaac           idiag += 16;
932e48d15efSToby Isaac           i2 += 4;
933e48d15efSToby Isaac         }
934e48d15efSToby Isaac         break;
935e48d15efSToby Isaac       case 5:
936e48d15efSToby Isaac         for (i = 0; i < m; i++) {
937e48d15efSToby Isaac           v    = aa + 25 * ai[i];
938e48d15efSToby Isaac           vi   = aj + ai[i];
939e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9409371c9d4SSatish Balay           s[0] = b[i2];
9419371c9d4SSatish Balay           s[1] = b[i2 + 1];
9429371c9d4SSatish Balay           s[2] = b[i2 + 2];
9439371c9d4SSatish Balay           s[3] = b[i2 + 3];
9449371c9d4SSatish Balay           s[4] = b[i2 + 4];
945e48d15efSToby Isaac           while (nz--) {
946e48d15efSToby Isaac             idx   = 5 * (*vi++);
9479371c9d4SSatish Balay             xw[0] = x[idx];
9489371c9d4SSatish Balay             xw[1] = x[1 + idx];
9499371c9d4SSatish Balay             xw[2] = x[2 + idx];
9509371c9d4SSatish Balay             xw[3] = x[3 + idx];
9519371c9d4SSatish Balay             xw[4] = x[4 + idx];
952e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
953e48d15efSToby Isaac             v += 25;
954e48d15efSToby Isaac           }
955e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
9569371c9d4SSatish Balay           x[i2] += xw[0];
9579371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9589371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9599371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9609371c9d4SSatish Balay           x[i2 + 4] += xw[4];
961e48d15efSToby Isaac           idiag += 25;
962e48d15efSToby Isaac           i2 += 5;
963e48d15efSToby Isaac         }
964e48d15efSToby Isaac         break;
965e48d15efSToby Isaac       case 6:
966e48d15efSToby Isaac         for (i = 0; i < m; i++) {
967e48d15efSToby Isaac           v    = aa + 36 * ai[i];
968e48d15efSToby Isaac           vi   = aj + ai[i];
969e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9709371c9d4SSatish Balay           s[0] = b[i2];
9719371c9d4SSatish Balay           s[1] = b[i2 + 1];
9729371c9d4SSatish Balay           s[2] = b[i2 + 2];
9739371c9d4SSatish Balay           s[3] = b[i2 + 3];
9749371c9d4SSatish Balay           s[4] = b[i2 + 4];
9759371c9d4SSatish Balay           s[5] = b[i2 + 5];
976e48d15efSToby Isaac           while (nz--) {
977e48d15efSToby Isaac             idx   = 6 * (*vi++);
9789371c9d4SSatish Balay             xw[0] = x[idx];
9799371c9d4SSatish Balay             xw[1] = x[1 + idx];
9809371c9d4SSatish Balay             xw[2] = x[2 + idx];
9819371c9d4SSatish Balay             xw[3] = x[3 + idx];
9829371c9d4SSatish Balay             xw[4] = x[4 + idx];
9839371c9d4SSatish Balay             xw[5] = x[5 + idx];
984e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
985e48d15efSToby Isaac             v += 36;
986e48d15efSToby Isaac           }
987e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
9889371c9d4SSatish Balay           x[i2] += xw[0];
9899371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9909371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9919371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9929371c9d4SSatish Balay           x[i2 + 4] += xw[4];
9939371c9d4SSatish Balay           x[i2 + 5] += xw[5];
994e48d15efSToby Isaac           idiag += 36;
995e48d15efSToby Isaac           i2 += 6;
996e48d15efSToby Isaac         }
997e48d15efSToby Isaac         break;
998e48d15efSToby Isaac       case 7:
999e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1000e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1001e48d15efSToby Isaac           vi   = aj + ai[i];
1002e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10039371c9d4SSatish Balay           s[0] = b[i2];
10049371c9d4SSatish Balay           s[1] = b[i2 + 1];
10059371c9d4SSatish Balay           s[2] = b[i2 + 2];
10069371c9d4SSatish Balay           s[3] = b[i2 + 3];
10079371c9d4SSatish Balay           s[4] = b[i2 + 4];
10089371c9d4SSatish Balay           s[5] = b[i2 + 5];
10099371c9d4SSatish Balay           s[6] = b[i2 + 6];
1010e48d15efSToby Isaac           while (nz--) {
1011e48d15efSToby Isaac             idx   = 7 * (*vi++);
10129371c9d4SSatish Balay             xw[0] = x[idx];
10139371c9d4SSatish Balay             xw[1] = x[1 + idx];
10149371c9d4SSatish Balay             xw[2] = x[2 + idx];
10159371c9d4SSatish Balay             xw[3] = x[3 + idx];
10169371c9d4SSatish Balay             xw[4] = x[4 + idx];
10179371c9d4SSatish Balay             xw[5] = x[5 + idx];
10189371c9d4SSatish Balay             xw[6] = x[6 + idx];
1019e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1020e48d15efSToby Isaac             v += 49;
1021e48d15efSToby Isaac           }
1022e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
10239371c9d4SSatish Balay           x[i2] += xw[0];
10249371c9d4SSatish Balay           x[i2 + 1] += xw[1];
10259371c9d4SSatish Balay           x[i2 + 2] += xw[2];
10269371c9d4SSatish Balay           x[i2 + 3] += xw[3];
10279371c9d4SSatish Balay           x[i2 + 4] += xw[4];
10289371c9d4SSatish Balay           x[i2 + 5] += xw[5];
10299371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1030e48d15efSToby Isaac           idiag += 49;
1031e48d15efSToby Isaac           i2 += 7;
1032e48d15efSToby Isaac         }
1033e48d15efSToby Isaac         break;
1034e48d15efSToby Isaac       default:
1035e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1036e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1037e48d15efSToby Isaac           vi = aj + ai[i];
1038e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1039e48d15efSToby Isaac 
10409566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1041e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1042e48d15efSToby Isaac           workt = work;
1043e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
10449566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1045e48d15efSToby Isaac             workt += bs;
1046e48d15efSToby Isaac           }
1047e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1048e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1049e48d15efSToby Isaac 
1050e48d15efSToby Isaac           idiag += bs2;
1051e48d15efSToby Isaac           i2 += bs;
1052e48d15efSToby Isaac         }
1053e48d15efSToby Isaac         break;
1054e48d15efSToby Isaac       }
10559566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1056e48d15efSToby Isaac     }
1057e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1058e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
1059e48d15efSToby Isaac       i2    = bs * (m - 1);
1060e48d15efSToby Isaac       switch (bs) {
1061e48d15efSToby Isaac       case 1:
1062e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1063e48d15efSToby Isaac           v    = aa + ai[i];
1064e48d15efSToby Isaac           vi   = aj + ai[i];
1065e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
1066e48d15efSToby Isaac           s[0] = b[i2];
1067e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1068e48d15efSToby Isaac             xw[0] = x[vi[j]];
1069e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1070e48d15efSToby Isaac           }
1071e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1072e48d15efSToby Isaac           x[i2] += xw[0];
1073e48d15efSToby Isaac           idiag -= 1;
1074e48d15efSToby Isaac           i2 -= 1;
1075e48d15efSToby Isaac         }
1076e48d15efSToby Isaac         break;
1077e48d15efSToby Isaac       case 2:
1078e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1079e48d15efSToby Isaac           v    = aa + 4 * ai[i];
1080e48d15efSToby Isaac           vi   = aj + ai[i];
1081e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10829371c9d4SSatish Balay           s[0] = b[i2];
10839371c9d4SSatish Balay           s[1] = b[i2 + 1];
1084e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1085e48d15efSToby Isaac             idx   = 2 * vi[j];
1086e48d15efSToby Isaac             it    = 4 * j;
10879371c9d4SSatish Balay             xw[0] = x[idx];
10889371c9d4SSatish Balay             xw[1] = x[1 + idx];
1089e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1090e48d15efSToby Isaac           }
1091e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
10929371c9d4SSatish Balay           x[i2] += xw[0];
10939371c9d4SSatish Balay           x[i2 + 1] += xw[1];
1094e48d15efSToby Isaac           idiag -= 4;
1095e48d15efSToby Isaac           i2 -= 2;
1096e48d15efSToby Isaac         }
1097e48d15efSToby Isaac         break;
1098e48d15efSToby Isaac       case 3:
1099e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1100e48d15efSToby Isaac           v    = aa + 9 * ai[i];
1101e48d15efSToby Isaac           vi   = aj + ai[i];
1102e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11039371c9d4SSatish Balay           s[0] = b[i2];
11049371c9d4SSatish Balay           s[1] = b[i2 + 1];
11059371c9d4SSatish Balay           s[2] = b[i2 + 2];
1106e48d15efSToby Isaac           while (nz--) {
1107e48d15efSToby Isaac             idx   = 3 * (*vi++);
11089371c9d4SSatish Balay             xw[0] = x[idx];
11099371c9d4SSatish Balay             xw[1] = x[1 + idx];
11109371c9d4SSatish Balay             xw[2] = x[2 + idx];
1111e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1112e48d15efSToby Isaac             v += 9;
1113e48d15efSToby Isaac           }
1114e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
11159371c9d4SSatish Balay           x[i2] += xw[0];
11169371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11179371c9d4SSatish Balay           x[i2 + 2] += xw[2];
1118e48d15efSToby Isaac           idiag -= 9;
1119e48d15efSToby Isaac           i2 -= 3;
1120e48d15efSToby Isaac         }
1121e48d15efSToby Isaac         break;
1122e48d15efSToby Isaac       case 4:
1123e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1124e48d15efSToby Isaac           v    = aa + 16 * ai[i];
1125e48d15efSToby Isaac           vi   = aj + ai[i];
1126e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11279371c9d4SSatish Balay           s[0] = b[i2];
11289371c9d4SSatish Balay           s[1] = b[i2 + 1];
11299371c9d4SSatish Balay           s[2] = b[i2 + 2];
11309371c9d4SSatish Balay           s[3] = b[i2 + 3];
1131e48d15efSToby Isaac           while (nz--) {
1132e48d15efSToby Isaac             idx   = 4 * (*vi++);
11339371c9d4SSatish Balay             xw[0] = x[idx];
11349371c9d4SSatish Balay             xw[1] = x[1 + idx];
11359371c9d4SSatish Balay             xw[2] = x[2 + idx];
11369371c9d4SSatish Balay             xw[3] = x[3 + idx];
1137e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1138e48d15efSToby Isaac             v += 16;
1139e48d15efSToby Isaac           }
1140e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
11419371c9d4SSatish Balay           x[i2] += xw[0];
11429371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11439371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11449371c9d4SSatish Balay           x[i2 + 3] += xw[3];
1145e48d15efSToby Isaac           idiag -= 16;
1146e48d15efSToby Isaac           i2 -= 4;
1147e48d15efSToby Isaac         }
1148e48d15efSToby Isaac         break;
1149e48d15efSToby Isaac       case 5:
1150e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1151e48d15efSToby Isaac           v    = aa + 25 * ai[i];
1152e48d15efSToby Isaac           vi   = aj + ai[i];
1153e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11549371c9d4SSatish Balay           s[0] = b[i2];
11559371c9d4SSatish Balay           s[1] = b[i2 + 1];
11569371c9d4SSatish Balay           s[2] = b[i2 + 2];
11579371c9d4SSatish Balay           s[3] = b[i2 + 3];
11589371c9d4SSatish Balay           s[4] = b[i2 + 4];
1159e48d15efSToby Isaac           while (nz--) {
1160e48d15efSToby Isaac             idx   = 5 * (*vi++);
11619371c9d4SSatish Balay             xw[0] = x[idx];
11629371c9d4SSatish Balay             xw[1] = x[1 + idx];
11639371c9d4SSatish Balay             xw[2] = x[2 + idx];
11649371c9d4SSatish Balay             xw[3] = x[3 + idx];
11659371c9d4SSatish Balay             xw[4] = x[4 + idx];
1166e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1167e48d15efSToby Isaac             v += 25;
1168e48d15efSToby Isaac           }
1169e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
11709371c9d4SSatish Balay           x[i2] += xw[0];
11719371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11729371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11739371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11749371c9d4SSatish Balay           x[i2 + 4] += xw[4];
1175e48d15efSToby Isaac           idiag -= 25;
1176e48d15efSToby Isaac           i2 -= 5;
1177e48d15efSToby Isaac         }
1178e48d15efSToby Isaac         break;
1179e48d15efSToby Isaac       case 6:
1180e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1181e48d15efSToby Isaac           v    = aa + 36 * ai[i];
1182e48d15efSToby Isaac           vi   = aj + ai[i];
1183e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11849371c9d4SSatish Balay           s[0] = b[i2];
11859371c9d4SSatish Balay           s[1] = b[i2 + 1];
11869371c9d4SSatish Balay           s[2] = b[i2 + 2];
11879371c9d4SSatish Balay           s[3] = b[i2 + 3];
11889371c9d4SSatish Balay           s[4] = b[i2 + 4];
11899371c9d4SSatish Balay           s[5] = b[i2 + 5];
1190e48d15efSToby Isaac           while (nz--) {
1191e48d15efSToby Isaac             idx   = 6 * (*vi++);
11929371c9d4SSatish Balay             xw[0] = x[idx];
11939371c9d4SSatish Balay             xw[1] = x[1 + idx];
11949371c9d4SSatish Balay             xw[2] = x[2 + idx];
11959371c9d4SSatish Balay             xw[3] = x[3 + idx];
11969371c9d4SSatish Balay             xw[4] = x[4 + idx];
11979371c9d4SSatish Balay             xw[5] = x[5 + idx];
1198e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1199e48d15efSToby Isaac             v += 36;
1200e48d15efSToby Isaac           }
1201e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
12029371c9d4SSatish Balay           x[i2] += xw[0];
12039371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12049371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12059371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12069371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12079371c9d4SSatish Balay           x[i2 + 5] += xw[5];
1208e48d15efSToby Isaac           idiag -= 36;
1209e48d15efSToby Isaac           i2 -= 6;
1210e48d15efSToby Isaac         }
1211e48d15efSToby Isaac         break;
1212e48d15efSToby Isaac       case 7:
1213e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1214e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1215e48d15efSToby Isaac           vi   = aj + ai[i];
1216e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
12179371c9d4SSatish Balay           s[0] = b[i2];
12189371c9d4SSatish Balay           s[1] = b[i2 + 1];
12199371c9d4SSatish Balay           s[2] = b[i2 + 2];
12209371c9d4SSatish Balay           s[3] = b[i2 + 3];
12219371c9d4SSatish Balay           s[4] = b[i2 + 4];
12229371c9d4SSatish Balay           s[5] = b[i2 + 5];
12239371c9d4SSatish Balay           s[6] = b[i2 + 6];
1224e48d15efSToby Isaac           while (nz--) {
1225e48d15efSToby Isaac             idx   = 7 * (*vi++);
12269371c9d4SSatish Balay             xw[0] = x[idx];
12279371c9d4SSatish Balay             xw[1] = x[1 + idx];
12289371c9d4SSatish Balay             xw[2] = x[2 + idx];
12299371c9d4SSatish Balay             xw[3] = x[3 + idx];
12309371c9d4SSatish Balay             xw[4] = x[4 + idx];
12319371c9d4SSatish Balay             xw[5] = x[5 + idx];
12329371c9d4SSatish Balay             xw[6] = x[6 + idx];
1233e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1234e48d15efSToby Isaac             v += 49;
1235e48d15efSToby Isaac           }
1236e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
12379371c9d4SSatish Balay           x[i2] += xw[0];
12389371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12399371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12409371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12419371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12429371c9d4SSatish Balay           x[i2 + 5] += xw[5];
12439371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1244e48d15efSToby Isaac           idiag -= 49;
1245e48d15efSToby Isaac           i2 -= 7;
1246e48d15efSToby Isaac         }
1247e48d15efSToby Isaac         break;
1248e48d15efSToby Isaac       default:
1249e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1250e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1251e48d15efSToby Isaac           vi = aj + ai[i];
1252e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1253e48d15efSToby Isaac 
12549566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1255e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1256e48d15efSToby Isaac           workt = work;
1257e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
12589566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1259e48d15efSToby Isaac             workt += bs;
1260e48d15efSToby Isaac           }
1261e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1262e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1263e48d15efSToby Isaac 
1264e48d15efSToby Isaac           idiag -= bs2;
1265e48d15efSToby Isaac           i2 -= bs;
1266e48d15efSToby Isaac         }
1267e48d15efSToby Isaac         break;
1268e48d15efSToby Isaac       }
12699566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1270e48d15efSToby Isaac     }
1271e48d15efSToby Isaac   }
12729566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1275de80f912SBarry Smith }
1276de80f912SBarry Smith 
1277af674e45SBarry Smith /*
127881824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1279af674e45SBarry Smith */
1280af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1281af674e45SBarry Smith   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1282af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1283af674e45SBarry Smith   #define matsetvaluesblocked4_ matsetvaluesblocked4
1284af674e45SBarry Smith #endif
1285af674e45SBarry Smith 
1286d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1287d71ae5a4SJacob Faibussowitsch {
1288af674e45SBarry Smith   Mat                A = *AA;
1289af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1290c1ac3661SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1291c1ac3661SBarry Smith   PetscInt          *ai = a->i, *ailen = a->ilen;
129217ec6a02SBarry Smith   PetscInt          *aj = a->j, stepval, lastcol = -1;
1293f15d580aSBarry Smith   const PetscScalar *value = v;
12944bb09213Spetsc   MatScalar         *ap, *aa = a->a, *bap;
1295af674e45SBarry Smith 
1296af674e45SBarry Smith   PetscFunctionBegin;
1297ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1298af674e45SBarry Smith   stepval = (n - 1) * 4;
1299af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
1300af674e45SBarry Smith     row  = im[k];
1301af674e45SBarry Smith     rp   = aj + ai[row];
1302af674e45SBarry Smith     ap   = aa + 16 * ai[row];
1303af674e45SBarry Smith     nrow = ailen[row];
1304af674e45SBarry Smith     low  = 0;
130517ec6a02SBarry Smith     high = nrow;
1306af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
1307af674e45SBarry Smith       col = in[l];
1308db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1309db4deed7SKarl Rupp       else high = nrow;
131017ec6a02SBarry Smith       lastcol = col;
13111e3347e8SBarry Smith       value   = v + k * (stepval + 4 + l) * 4;
1312af674e45SBarry Smith       while (high - low > 7) {
1313af674e45SBarry Smith         t = (low + high) / 2;
1314af674e45SBarry Smith         if (rp[t] > col) high = t;
1315af674e45SBarry Smith         else low = t;
1316af674e45SBarry Smith       }
1317af674e45SBarry Smith       for (i = low; i < high; i++) {
1318af674e45SBarry Smith         if (rp[i] > col) break;
1319af674e45SBarry Smith         if (rp[i] == col) {
1320af674e45SBarry Smith           bap = ap + 16 * i;
1321af674e45SBarry Smith           for (ii = 0; ii < 4; ii++, value += stepval) {
1322ad540459SPierre Jolivet             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1323af674e45SBarry Smith           }
1324af674e45SBarry Smith           goto noinsert2;
1325af674e45SBarry Smith         }
1326af674e45SBarry Smith       }
1327af674e45SBarry Smith       N = nrow++ - 1;
132817ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
1329af674e45SBarry Smith       /* shift up all the later entries in this row */
1330af674e45SBarry Smith       for (ii = N; ii >= i; ii--) {
1331af674e45SBarry Smith         rp[ii + 1] = rp[ii];
13329566063dSJacob Faibussowitsch         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1333af674e45SBarry Smith       }
133448a46eb9SPierre Jolivet       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1335af674e45SBarry Smith       rp[i] = col;
1336af674e45SBarry Smith       bap   = ap + 16 * i;
1337af674e45SBarry Smith       for (ii = 0; ii < 4; ii++, value += stepval) {
1338ad540459SPierre Jolivet         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1339af674e45SBarry Smith       }
1340af674e45SBarry Smith     noinsert2:;
1341af674e45SBarry Smith       low = i;
1342af674e45SBarry Smith     }
1343af674e45SBarry Smith     ailen[row] = nrow;
1344af674e45SBarry Smith   }
1345be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1346af674e45SBarry Smith }
1347af674e45SBarry Smith 
1348af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1349af674e45SBarry Smith   #define matsetvalues4_ MATSETVALUES4
1350af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1351af674e45SBarry Smith   #define matsetvalues4_ matsetvalues4
1352af674e45SBarry Smith #endif
1353af674e45SBarry Smith 
1354d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1355d71ae5a4SJacob Faibussowitsch {
1356af674e45SBarry Smith   Mat          A = *AA;
1357af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1358580bdb30SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1359c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
1360c1ac3661SBarry Smith   PetscInt    *aj = a->j, brow, bcol;
136117ec6a02SBarry Smith   PetscInt     ridx, cidx, lastcol = -1;
1362af674e45SBarry Smith   MatScalar   *ap, value, *aa      = a->a, *bap;
1363af674e45SBarry Smith 
1364af674e45SBarry Smith   PetscFunctionBegin;
1365af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
13669371c9d4SSatish Balay     row  = im[k];
13679371c9d4SSatish Balay     brow = row / 4;
1368af674e45SBarry Smith     rp   = aj + ai[brow];
1369af674e45SBarry Smith     ap   = aa + 16 * ai[brow];
1370af674e45SBarry Smith     nrow = ailen[brow];
1371af674e45SBarry Smith     low  = 0;
137217ec6a02SBarry Smith     high = nrow;
1373af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
13749371c9d4SSatish Balay       col   = in[l];
13759371c9d4SSatish Balay       bcol  = col / 4;
13769371c9d4SSatish Balay       ridx  = row % 4;
13779371c9d4SSatish Balay       cidx  = col % 4;
1378af674e45SBarry Smith       value = v[l + k * n];
1379db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1380db4deed7SKarl Rupp       else high = nrow;
138117ec6a02SBarry Smith       lastcol = col;
1382af674e45SBarry Smith       while (high - low > 7) {
1383af674e45SBarry Smith         t = (low + high) / 2;
1384af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1385af674e45SBarry Smith         else low = t;
1386af674e45SBarry Smith       }
1387af674e45SBarry Smith       for (i = low; i < high; i++) {
1388af674e45SBarry Smith         if (rp[i] > bcol) break;
1389af674e45SBarry Smith         if (rp[i] == bcol) {
1390af674e45SBarry Smith           bap = ap + 16 * i + 4 * cidx + ridx;
1391af674e45SBarry Smith           *bap += value;
1392af674e45SBarry Smith           goto noinsert1;
1393af674e45SBarry Smith         }
1394af674e45SBarry Smith       }
1395af674e45SBarry Smith       N = nrow++ - 1;
139617ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1397af674e45SBarry Smith       /* shift up all the later entries in this row */
13989566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
13999566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
14009566063dSJacob Faibussowitsch       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1401af674e45SBarry Smith       rp[i]                        = bcol;
1402af674e45SBarry Smith       ap[16 * i + 4 * cidx + ridx] = value;
1403af674e45SBarry Smith     noinsert1:;
1404af674e45SBarry Smith       low = i;
1405af674e45SBarry Smith     }
1406af674e45SBarry Smith     ailen[brow] = nrow;
1407af674e45SBarry Smith   }
1408be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1409af674e45SBarry Smith }
1410af674e45SBarry Smith 
1411be5855fcSBarry Smith /*
1412be5855fcSBarry Smith      Checks for missing diagonals
1413be5855fcSBarry Smith */
1414d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1415d71ae5a4SJacob Faibussowitsch {
1416be5855fcSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14177734d3b5SMatthew G. Knepley   PetscInt    *diag, *ii = a->i, i;
1418be5855fcSBarry Smith 
1419be5855fcSBarry Smith   PetscFunctionBegin;
14209566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
14212af78befSBarry Smith   *missing = PETSC_FALSE;
14227734d3b5SMatthew G. Knepley   if (A->rmap->n > 0 && !ii) {
14232efa7f71SHong Zhang     *missing = PETSC_TRUE;
14242efa7f71SHong Zhang     if (d) *d = 0;
14259566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
14262efa7f71SHong Zhang   } else {
142701445905SHong Zhang     PetscInt n;
142801445905SHong Zhang     n    = PetscMin(a->mbs, a->nbs);
1429883fce79SBarry Smith     diag = a->diag;
143001445905SHong Zhang     for (i = 0; i < n; i++) {
14317734d3b5SMatthew G. Knepley       if (diag[i] >= ii[i + 1]) {
14322af78befSBarry Smith         *missing = PETSC_TRUE;
14332af78befSBarry Smith         if (d) *d = i;
14349566063dSJacob Faibussowitsch         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1435358d2f5dSShri Abhyankar         break;
14362efa7f71SHong Zhang       }
1437be5855fcSBarry Smith     }
1438be5855fcSBarry Smith   }
14393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1440be5855fcSBarry Smith }
1441be5855fcSBarry Smith 
1442d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1443d71ae5a4SJacob Faibussowitsch {
1444de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
144509f38230SBarry Smith   PetscInt     i, j, m = a->mbs;
1446de6a44a3SBarry Smith 
14473a40ed3dSBarry Smith   PetscFunctionBegin;
144809f38230SBarry Smith   if (!a->diag) {
14499566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(m, &a->diag));
14504fd072dbSBarry Smith     a->free_diag = PETSC_TRUE;
145109f38230SBarry Smith   }
14527fc0212eSBarry Smith   for (i = 0; i < m; i++) {
145309f38230SBarry Smith     a->diag[i] = a->i[i + 1];
1454de6a44a3SBarry Smith     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1455de6a44a3SBarry Smith       if (a->j[j] == i) {
145609f38230SBarry Smith         a->diag[i] = j;
1457de6a44a3SBarry Smith         break;
1458de6a44a3SBarry Smith       }
1459de6a44a3SBarry Smith     }
1460de6a44a3SBarry Smith   }
14613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1462de6a44a3SBarry Smith }
14632593348eSBarry Smith 
1464d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1465d71ae5a4SJacob Faibussowitsch {
14663b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14671a83f524SJed Brown   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
14681a83f524SJed Brown   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
14693b2fbd54SBarry Smith 
14703a40ed3dSBarry Smith   PetscFunctionBegin;
14713b2fbd54SBarry Smith   *nn = n;
14723ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
14733b2fbd54SBarry Smith   if (symmetric) {
14749566063dSJacob Faibussowitsch     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1475553b3c51SBarry Smith     nz = tia[n];
14763b2fbd54SBarry Smith   } else {
14779371c9d4SSatish Balay     tia = a->i;
14789371c9d4SSatish Balay     tja = a->j;
14793b2fbd54SBarry Smith   }
14803b2fbd54SBarry Smith 
1481ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1482ecc77c7aSBarry Smith     (*nn) *= bs;
14838f7157efSSatish Balay     /* malloc & create the natural set of indices */
14849566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((n + 1) * bs, ia));
14859985e31cSBarry Smith     if (n) {
14862462f5fdSStefano Zampini       (*ia)[0] = oshift;
1487ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
14889985e31cSBarry Smith     }
1489ecc77c7aSBarry Smith 
1490ecc77c7aSBarry Smith     for (i = 1; i < n; i++) {
1491ecc77c7aSBarry Smith       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1492ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
14938f7157efSSatish Balay     }
1494ad540459SPierre Jolivet     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1495ecc77c7aSBarry Smith 
14961a83f524SJed Brown     if (inja) {
14979566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz * bs * bs, ja));
14989985e31cSBarry Smith       cnt = 0;
14999985e31cSBarry Smith       for (i = 0; i < n; i++) {
15009985e31cSBarry Smith         for (j = 0; j < bs; j++) {
15019985e31cSBarry Smith           for (k = tia[i]; k < tia[i + 1]; k++) {
1502ad540459SPierre Jolivet             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
15039985e31cSBarry Smith           }
15049985e31cSBarry Smith         }
15059985e31cSBarry Smith       }
15069985e31cSBarry Smith     }
15079985e31cSBarry Smith 
15088f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
15099566063dSJacob Faibussowitsch       PetscCall(PetscFree(tia));
15109566063dSJacob Faibussowitsch       PetscCall(PetscFree(tja));
15118f7157efSSatish Balay     }
1512f6d58c54SBarry Smith   } else if (oshift == 1) {
1513715a17b5SBarry Smith     if (symmetric) {
1514a2ea699eSBarry Smith       nz = tia[A->rmap->n / bs];
1515715a17b5SBarry Smith       /*  add 1 to i and j indices */
1516715a17b5SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1517715a17b5SBarry Smith       *ia = tia;
1518715a17b5SBarry Smith       if (ja) {
1519715a17b5SBarry Smith         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1520715a17b5SBarry Smith         *ja = tja;
1521715a17b5SBarry Smith       }
1522715a17b5SBarry Smith     } else {
1523a2ea699eSBarry Smith       nz = a->i[A->rmap->n / bs];
1524f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
15259566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1526f6d58c54SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1527f6d58c54SBarry Smith       if (ja) {
15289566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(nz, ja));
1529f6d58c54SBarry Smith         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1530f6d58c54SBarry Smith       }
1531715a17b5SBarry Smith     }
15328f7157efSSatish Balay   } else {
15338f7157efSSatish Balay     *ia = tia;
1534ecc77c7aSBarry Smith     if (ja) *ja = tja;
15358f7157efSSatish Balay   }
15363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15373b2fbd54SBarry Smith }
15383b2fbd54SBarry Smith 
1539d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1540d71ae5a4SJacob Faibussowitsch {
15413a40ed3dSBarry Smith   PetscFunctionBegin;
15423ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1543715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
15449566063dSJacob Faibussowitsch     PetscCall(PetscFree(*ia));
15459566063dSJacob Faibussowitsch     if (ja) PetscCall(PetscFree(*ja));
15463b2fbd54SBarry Smith   }
15473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15483b2fbd54SBarry Smith }
15493b2fbd54SBarry Smith 
1550d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1551d71ae5a4SJacob Faibussowitsch {
15522d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15532d61bbb3SSatish Balay 
1554433994e6SBarry Smith   PetscFunctionBegin;
1555b4e2f619SBarry Smith   if (A->hash_active) {
1556b4e2f619SBarry Smith     PetscInt bs;
1557e3c72094SPierre Jolivet     A->ops[0] = a->cops;
1558b4e2f619SBarry Smith     PetscCall(PetscHMapIJVDestroy(&a->ht));
1559b4e2f619SBarry Smith     PetscCall(MatGetBlockSize(A, &bs));
1560b4e2f619SBarry Smith     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1561b4e2f619SBarry Smith     PetscCall(PetscFree(a->dnz));
1562b4e2f619SBarry Smith     PetscCall(PetscFree(a->bdnz));
1563b4e2f619SBarry Smith     A->hash_active = PETSC_FALSE;
1564b4e2f619SBarry Smith   }
15653ba16761SJacob Faibussowitsch   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
15669566063dSJacob Faibussowitsch   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
15679566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
15689566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
15699566063dSJacob Faibussowitsch   if (a->free_diag) PetscCall(PetscFree(a->diag));
15709566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->idiag));
15719566063dSJacob Faibussowitsch   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
15729566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->solve_work));
15739566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->mult_work));
15749566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_workt));
15759566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_work));
15769566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
15779566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->saved_values));
15789566063dSJacob Faibussowitsch   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1579c4319e64SHong Zhang 
15809566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->sbaijMat));
15819566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->parent));
15829566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1583901853e0SKris Buschelman 
15849566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
15859566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
15869566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
15879566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
15889566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
15899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
15909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
15919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
15929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
15939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
15949566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
15959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
15967ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
15979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
15987ea3e4caSstefano_zampini #endif
15999566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
16002e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
16013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16022d61bbb3SSatish Balay }
16032d61bbb3SSatish Balay 
160466976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1605d71ae5a4SJacob Faibussowitsch {
16062d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
16072d61bbb3SSatish Balay 
16082d61bbb3SSatish Balay   PetscFunctionBegin;
1609aa275fccSKris Buschelman   switch (op) {
1610d71ae5a4SJacob Faibussowitsch   case MAT_ROW_ORIENTED:
1611d71ae5a4SJacob Faibussowitsch     a->roworiented = flg;
1612d71ae5a4SJacob Faibussowitsch     break;
1613d71ae5a4SJacob Faibussowitsch   case MAT_KEEP_NONZERO_PATTERN:
1614d71ae5a4SJacob Faibussowitsch     a->keepnonzeropattern = flg;
1615d71ae5a4SJacob Faibussowitsch     break;
1616d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATIONS:
1617d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? 0 : 1);
1618d71ae5a4SJacob Faibussowitsch     break;
1619d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATION_ERR:
1620d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -1 : 0);
1621d71ae5a4SJacob Faibussowitsch     break;
1622d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1623d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -2 : 0);
1624d71ae5a4SJacob Faibussowitsch     break;
1625d71ae5a4SJacob Faibussowitsch   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1626d71ae5a4SJacob Faibussowitsch     a->nounused = (flg ? -1 : 0);
1627d71ae5a4SJacob Faibussowitsch     break;
1628d71ae5a4SJacob Faibussowitsch   default:
1629888c827cSStefano Zampini     break;
16302d61bbb3SSatish Balay   }
16313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16322d61bbb3SSatish Balay }
16332d61bbb3SSatish Balay 
163452768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
1635d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1636d71ae5a4SJacob Faibussowitsch {
163752768537SHong Zhang   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
163852768537SHong Zhang   MatScalar   *aa_i;
163987828ca2SBarry Smith   PetscScalar *v_i;
16402d61bbb3SSatish Balay 
16412d61bbb3SSatish Balay   PetscFunctionBegin;
1642d0f46423SBarry Smith   bs  = A->rmap->bs;
164352768537SHong Zhang   bs2 = bs * bs;
16445f80ce2aSJacob Faibussowitsch   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
16452d61bbb3SSatish Balay 
16462d61bbb3SSatish Balay   bn  = row / bs; /* Block number */
16472d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
16482d61bbb3SSatish Balay   M   = ai[bn + 1] - ai[bn];
16492d61bbb3SSatish Balay   *nz = bs * M;
16502d61bbb3SSatish Balay 
16512d61bbb3SSatish Balay   if (v) {
1652f4259b30SLisandro Dalcin     *v = NULL;
16532d61bbb3SSatish Balay     if (*nz) {
16549566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, v));
16552d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16562d61bbb3SSatish Balay         v_i  = *v + i * bs;
16572d61bbb3SSatish Balay         aa_i = aa + bs2 * (ai[bn] + i);
165826fbe8dcSKarl Rupp         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
16592d61bbb3SSatish Balay       }
16602d61bbb3SSatish Balay     }
16612d61bbb3SSatish Balay   }
16622d61bbb3SSatish Balay 
16632d61bbb3SSatish Balay   if (idx) {
1664f4259b30SLisandro Dalcin     *idx = NULL;
16652d61bbb3SSatish Balay     if (*nz) {
16669566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, idx));
16672d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16682d61bbb3SSatish Balay         idx_i = *idx + i * bs;
16692d61bbb3SSatish Balay         itmp  = bs * aj[ai[bn] + i];
167026fbe8dcSKarl Rupp         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
16712d61bbb3SSatish Balay       }
16722d61bbb3SSatish Balay     }
16732d61bbb3SSatish Balay   }
16743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16752d61bbb3SSatish Balay }
16762d61bbb3SSatish Balay 
1677d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1678d71ae5a4SJacob Faibussowitsch {
167952768537SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
168052768537SHong Zhang 
168152768537SHong Zhang   PetscFunctionBegin;
16829566063dSJacob Faibussowitsch   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
16833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
168452768537SHong Zhang }
168552768537SHong Zhang 
1686d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1687d71ae5a4SJacob Faibussowitsch {
16882d61bbb3SSatish Balay   PetscFunctionBegin;
16899566063dSJacob Faibussowitsch   if (idx) PetscCall(PetscFree(*idx));
16909566063dSJacob Faibussowitsch   if (v) PetscCall(PetscFree(*v));
16913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16922d61bbb3SSatish Balay }
16932d61bbb3SSatish Balay 
169466976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1695d71ae5a4SJacob Faibussowitsch {
169620e84f26SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
16972d61bbb3SSatish Balay   Mat          C;
169820e84f26SHong Zhang   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
169920e84f26SHong Zhang   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
170020e84f26SHong Zhang   MatScalar   *ata, *aa = a->a;
17012d61bbb3SSatish Balay 
17022d61bbb3SSatish Balay   PetscFunctionBegin;
17037fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
17049566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1705cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
170620e84f26SHong Zhang     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
17072d61bbb3SSatish Balay 
17089566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
17099566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
17109566063dSJacob Faibussowitsch     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
17119566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
171220e84f26SHong Zhang 
171320e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
171420e84f26SHong Zhang     ati = at->i;
171520e84f26SHong Zhang     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1716fc4dec0aSBarry Smith   } else {
1717fc4dec0aSBarry Smith     C   = *B;
171820e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
171920e84f26SHong Zhang     ati = at->i;
1720fc4dec0aSBarry Smith   }
1721fc4dec0aSBarry Smith 
172220e84f26SHong Zhang   atj = at->j;
172320e84f26SHong Zhang   ata = at->a;
172420e84f26SHong Zhang 
172520e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
17269566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(atfill, ati, nbs));
172720e84f26SHong Zhang 
172820e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
17292d61bbb3SSatish Balay   for (i = 0; i < mbs; i++) {
173020e84f26SHong Zhang     anzj = ai[i + 1] - ai[i];
173120e84f26SHong Zhang     for (j = 0; j < anzj; j++) {
173220e84f26SHong Zhang       atj[atfill[*aj]] = i;
173320e84f26SHong Zhang       for (kr = 0; kr < bs; kr++) {
1734ad540459SPierre Jolivet         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
17352d61bbb3SSatish Balay       }
173620e84f26SHong Zhang       atfill[*aj++] += 1;
173720e84f26SHong Zhang     }
173820e84f26SHong Zhang   }
17399566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
17409566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
17412d61bbb3SSatish Balay 
174220e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
17439566063dSJacob Faibussowitsch   PetscCall(PetscFree(atfill));
174420e84f26SHong Zhang 
1745cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
174658b7e2c1SStefano Zampini     PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
17472d61bbb3SSatish Balay     *B = C;
17482d61bbb3SSatish Balay   } else {
17499566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &C));
17502d61bbb3SSatish Balay   }
17513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17522d61bbb3SSatish Balay }
17532d61bbb3SSatish Balay 
1754*28636b0cSPierre Jolivet static PetscErrorCode MatCompare_SeqBAIJ_Private(Mat A, Mat B, PetscReal tol, PetscBool *flg)
1755*28636b0cSPierre Jolivet {
1756*28636b0cSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1757*28636b0cSPierre Jolivet 
1758*28636b0cSPierre Jolivet   PetscFunctionBegin;
1759*28636b0cSPierre Jolivet   /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1760*28636b0cSPierre Jolivet   if (A->rmap->N != B->rmap->N || A->cmap->n != B->cmap->n || A->rmap->bs != B->rmap->bs || a->nz != b->nz) {
1761*28636b0cSPierre Jolivet     *flg = PETSC_FALSE;
1762*28636b0cSPierre Jolivet     PetscFunctionReturn(PETSC_SUCCESS);
1763*28636b0cSPierre Jolivet   }
1764*28636b0cSPierre Jolivet 
1765*28636b0cSPierre Jolivet   /* if the a->i are the same */
1766*28636b0cSPierre Jolivet   PetscCall(PetscArraycmp(a->i, b->i, a->mbs + 1, flg));
1767*28636b0cSPierre Jolivet   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
1768*28636b0cSPierre Jolivet 
1769*28636b0cSPierre Jolivet   /* if a->j are the same */
1770*28636b0cSPierre Jolivet   PetscCall(PetscArraycmp(a->j, b->j, a->nz, flg));
1771*28636b0cSPierre Jolivet   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
1772*28636b0cSPierre Jolivet 
1773*28636b0cSPierre Jolivet   if (tol == 0.0) PetscCall(PetscArraycmp(a->a, b->a, a->nz * A->rmap->bs * A->rmap->bs, flg)); /* if a->a are the same */
1774*28636b0cSPierre Jolivet   else {
1775*28636b0cSPierre Jolivet     *flg = PETSC_TRUE;
1776*28636b0cSPierre Jolivet     for (PetscInt i = 0; (i < a->nz * A->rmap->bs * A->rmap->bs) && *flg; ++i)
1777*28636b0cSPierre Jolivet       if (PetscAbsScalar(a->a[i] - b->a[i]) > tol) *flg = PETSC_FALSE;
1778*28636b0cSPierre Jolivet   }
1779*28636b0cSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
1780*28636b0cSPierre Jolivet }
1781*28636b0cSPierre Jolivet 
1782ff6a9541SJacob Faibussowitsch static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1783d71ae5a4SJacob Faibussowitsch {
1784453d3561SHong Zhang   Mat Btrans;
1785453d3561SHong Zhang 
1786453d3561SHong Zhang   PetscFunctionBegin;
1787acd337a6SBarry Smith   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1788*28636b0cSPierre Jolivet   PetscCall(MatCompare_SeqBAIJ_Private(A, Btrans, tol, f));
17899566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Btrans));
17903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1791453d3561SHong Zhang }
1792453d3561SHong Zhang 
1793*28636b0cSPierre Jolivet static PetscErrorCode MatEqual_SeqBAIJ(Mat A, Mat B, PetscBool *flg)
1794*28636b0cSPierre Jolivet {
1795*28636b0cSPierre Jolivet   PetscFunctionBegin;
1796*28636b0cSPierre Jolivet   PetscCall(MatCompare_SeqBAIJ_Private(A, B, 0.0, flg));
1797*28636b0cSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
1798*28636b0cSPierre Jolivet }
1799*28636b0cSPierre Jolivet 
1800618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1801d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1802d71ae5a4SJacob Faibussowitsch {
1803b51a4376SLisandro Dalcin   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1804b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1805b51a4376SLisandro Dalcin   PetscInt    *rowlens, *colidxs;
1806b51a4376SLisandro Dalcin   PetscScalar *matvals;
18072593348eSBarry Smith 
18083a40ed3dSBarry Smith   PetscFunctionBegin;
18099566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
18103b2fbd54SBarry Smith 
1811b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1812b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1813b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1814b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1815b51a4376SLisandro Dalcin   nz = bs * bs * A->nz;
18162593348eSBarry Smith 
1817b51a4376SLisandro Dalcin   /* write matrix header */
1818b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
18199371c9d4SSatish Balay   header[1] = M;
18209371c9d4SSatish Balay   header[2] = N;
18219371c9d4SSatish Balay   header[3] = nz;
18229566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
18232593348eSBarry Smith 
1824b51a4376SLisandro Dalcin   /* store row lengths */
18259566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
1826b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
18279371c9d4SSatish Balay     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
18289566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
18299566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
1830b51a4376SLisandro Dalcin 
1831b51a4376SLisandro Dalcin   /* store column indices  */
18329566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
1833b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1834b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1835b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
18369371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
18375f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
18389566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
18399566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
18402593348eSBarry Smith 
18412593348eSBarry Smith   /* store nonzero values */
18429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
1843b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1844b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1845b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
18469371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
18475f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
18489566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
18499566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
1850ce6f0cecSBarry Smith 
1851b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
18529566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
18533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18542593348eSBarry Smith }
18552593348eSBarry Smith 
1856d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1857d71ae5a4SJacob Faibussowitsch {
18587dc0baabSHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
18597dc0baabSHong Zhang   PetscInt     i, bs = A->rmap->bs, k;
18607dc0baabSHong Zhang 
18617dc0baabSHong Zhang   PetscFunctionBegin;
18629566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
18637dc0baabSHong Zhang   for (i = 0; i < a->mbs; i++) {
18649566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
186548a46eb9SPierre Jolivet     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
18669566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18677dc0baabSHong Zhang   }
18689566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18707dc0baabSHong Zhang }
18717dc0baabSHong Zhang 
1872d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1873d71ae5a4SJacob Faibussowitsch {
1874b6490206SBarry Smith   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1875d0f46423SBarry Smith   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1876f3ef73ceSBarry Smith   PetscViewerFormat format;
18772593348eSBarry Smith 
18783a40ed3dSBarry Smith   PetscFunctionBegin;
18797dc0baabSHong Zhang   if (A->structure_only) {
18809566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
18813ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
18827dc0baabSHong Zhang   }
18837dc0baabSHong Zhang 
18849566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
1885456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
18869566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1887fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1888ade3a672SBarry Smith     const char *matname;
1889bcd9e38bSBarry Smith     Mat         aij;
18909566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
18919566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
18929566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
18939566063dSJacob Faibussowitsch     PetscCall(MatView(aij, viewer));
18949566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&aij));
189504929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
18963ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1897fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
18989566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
189944cd7ae7SLois Curfman McInnes     for (i = 0; i < a->mbs; i++) {
190044cd7ae7SLois Curfman McInnes       for (j = 0; j < bs; j++) {
19019566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
190244cd7ae7SLois Curfman McInnes         for (k = a->i[i]; k < a->i[i + 1]; k++) {
190344cd7ae7SLois Curfman McInnes           for (l = 0; l < bs; l++) {
1904aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
19050e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
19069371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
19070e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
19089371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
19090e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
19109566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
19110ef38995SBarry Smith             }
191244cd7ae7SLois Curfman McInnes #else
191348a46eb9SPierre Jolivet             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
191444cd7ae7SLois Curfman McInnes #endif
191544cd7ae7SLois Curfman McInnes           }
191644cd7ae7SLois Curfman McInnes         }
19179566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
191844cd7ae7SLois Curfman McInnes       }
191944cd7ae7SLois Curfman McInnes     }
19209566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
19210ef38995SBarry Smith   } else {
19229566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1923b6490206SBarry Smith     for (i = 0; i < a->mbs; i++) {
1924b6490206SBarry Smith       for (j = 0; j < bs; j++) {
19259566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1926b6490206SBarry Smith         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1927b6490206SBarry Smith           for (l = 0; l < bs; l++) {
1928aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
19290e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
19309371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
19310e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
19329371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
19330ef38995SBarry Smith             } else {
19349566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
193588685aaeSLois Curfman McInnes             }
193688685aaeSLois Curfman McInnes #else
19379566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
193888685aaeSLois Curfman McInnes #endif
19392593348eSBarry Smith           }
19402593348eSBarry Smith         }
19419566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
19422593348eSBarry Smith       }
19432593348eSBarry Smith     }
19449566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1945b6490206SBarry Smith   }
19469566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
19473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19482593348eSBarry Smith }
19492593348eSBarry Smith 
19509804daf3SBarry Smith #include <petscdraw.h>
1951d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1952d71ae5a4SJacob Faibussowitsch {
195377ed5343SBarry Smith   Mat               A = (Mat)Aa;
19543270192aSSatish Balay   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
19556497c311SBarry Smith   PetscInt          row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
19560e6d2581SBarry Smith   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
19573f1db9ecSBarry Smith   MatScalar        *aa;
1958b0a32e0cSBarry Smith   PetscViewer       viewer;
1959b3e7f47fSJed Brown   PetscViewerFormat format;
19606497c311SBarry Smith   int               color;
19613270192aSSatish Balay 
19623a40ed3dSBarry Smith   PetscFunctionBegin;
19639566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
19649566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
19659566063dSJacob Faibussowitsch   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
196677ed5343SBarry Smith 
19673270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1968b3e7f47fSJed Brown 
1969b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1970d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1971383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1972b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
19733270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19743270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19759371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19769371c9d4SSatish Balay         y_r = y_l + 1.0;
19779371c9d4SSatish Balay         x_l = a->j[j] * bs;
19789371c9d4SSatish Balay         x_r = x_l + 1.0;
19793270192aSSatish Balay         aa  = a->a + j * bs2;
19803270192aSSatish Balay         for (k = 0; k < bs; k++) {
19813270192aSSatish Balay           for (l = 0; l < bs; l++) {
19820e6d2581SBarry Smith             if (PetscRealPart(*aa++) >= 0.) continue;
19839566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19843270192aSSatish Balay           }
19853270192aSSatish Balay         }
19863270192aSSatish Balay       }
19873270192aSSatish Balay     }
1988b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
19893270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19903270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19919371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19929371c9d4SSatish Balay         y_r = y_l + 1.0;
19939371c9d4SSatish Balay         x_l = a->j[j] * bs;
19949371c9d4SSatish Balay         x_r = x_l + 1.0;
19953270192aSSatish Balay         aa  = a->a + j * bs2;
19963270192aSSatish Balay         for (k = 0; k < bs; k++) {
19973270192aSSatish Balay           for (l = 0; l < bs; l++) {
19980e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
19999566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
20003270192aSSatish Balay           }
20013270192aSSatish Balay         }
20023270192aSSatish Balay       }
20033270192aSSatish Balay     }
2004b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
20053270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
20063270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
20079371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
20089371c9d4SSatish Balay         y_r = y_l + 1.0;
20099371c9d4SSatish Balay         x_l = a->j[j] * bs;
20109371c9d4SSatish Balay         x_r = x_l + 1.0;
20113270192aSSatish Balay         aa  = a->a + j * bs2;
20123270192aSSatish Balay         for (k = 0; k < bs; k++) {
20133270192aSSatish Balay           for (l = 0; l < bs; l++) {
20140e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
20159566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
20163270192aSSatish Balay           }
20173270192aSSatish Balay         }
20183270192aSSatish Balay       }
20193270192aSSatish Balay     }
2020d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
2021b3e7f47fSJed Brown   } else {
2022b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
2023b3e7f47fSJed Brown     /* first determine max of all nonzero values */
2024b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
2025b3e7f47fSJed Brown     PetscDraw popup;
2026b3e7f47fSJed Brown 
2027b3e7f47fSJed Brown     for (i = 0; i < a->nz * a->bs2; i++) {
2028b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
2029b3e7f47fSJed Brown     }
2030383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
20319566063dSJacob Faibussowitsch     PetscCall(PetscDrawGetPopup(draw, &popup));
20329566063dSJacob Faibussowitsch     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2033383922c3SLisandro Dalcin 
2034d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
2035b3e7f47fSJed Brown     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2036b3e7f47fSJed Brown       for (j = a->i[i]; j < a->i[i + 1]; j++) {
20379371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
20389371c9d4SSatish Balay         y_r = y_l + 1.0;
20399371c9d4SSatish Balay         x_l = a->j[j] * bs;
20409371c9d4SSatish Balay         x_r = x_l + 1.0;
2041b3e7f47fSJed Brown         aa  = a->a + j * bs2;
2042b3e7f47fSJed Brown         for (k = 0; k < bs; k++) {
2043b3e7f47fSJed Brown           for (l = 0; l < bs; l++) {
2044383922c3SLisandro Dalcin             MatScalar v = *aa++;
2045383922c3SLisandro Dalcin             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
20469566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2047b3e7f47fSJed Brown           }
2048b3e7f47fSJed Brown         }
2049b3e7f47fSJed Brown       }
2050b3e7f47fSJed Brown     }
2051d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
2052b3e7f47fSJed Brown   }
20533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
205477ed5343SBarry Smith }
20553270192aSSatish Balay 
2056d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2057d71ae5a4SJacob Faibussowitsch {
20580e6d2581SBarry Smith   PetscReal xl, yl, xr, yr, w, h;
2059b0a32e0cSBarry Smith   PetscDraw draw;
2060ace3abfcSBarry Smith   PetscBool isnull;
20613270192aSSatish Balay 
206277ed5343SBarry Smith   PetscFunctionBegin;
20639566063dSJacob Faibussowitsch   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
20649566063dSJacob Faibussowitsch   PetscCall(PetscDrawIsNull(draw, &isnull));
20653ba16761SJacob Faibussowitsch   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
206677ed5343SBarry Smith 
20679371c9d4SSatish Balay   xr = A->cmap->n;
20689371c9d4SSatish Balay   yr = A->rmap->N;
20699371c9d4SSatish Balay   h  = yr / 10.0;
20709371c9d4SSatish Balay   w  = xr / 10.0;
20719371c9d4SSatish Balay   xr += w;
20729371c9d4SSatish Balay   yr += h;
20739371c9d4SSatish Balay   xl = -w;
20749371c9d4SSatish Balay   yl = -h;
20759566063dSJacob Faibussowitsch   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
20769566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
20779566063dSJacob Faibussowitsch   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
20789566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
20799566063dSJacob Faibussowitsch   PetscCall(PetscDrawSave(draw));
20803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20813270192aSSatish Balay }
20823270192aSSatish Balay 
2083d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2084d71ae5a4SJacob Faibussowitsch {
20859f196a02SMartin Diehl   PetscBool isascii, isbinary, isdraw;
20862593348eSBarry Smith 
20873a40ed3dSBarry Smith   PetscFunctionBegin;
20889f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
20899566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
20909566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
20919f196a02SMartin Diehl   if (isascii) {
20929566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
20930f5bd95cSBarry Smith   } else if (isbinary) {
20949566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
20950f5bd95cSBarry Smith   } else if (isdraw) {
20969566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
20975cd90555SBarry Smith   } else {
2098a5e6ed63SBarry Smith     Mat B;
20999566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
21009566063dSJacob Faibussowitsch     PetscCall(MatView(B, viewer));
21019566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
21022593348eSBarry Smith   }
21033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21042593348eSBarry Smith }
2105b6490206SBarry Smith 
2106d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2107d71ae5a4SJacob Faibussowitsch {
2108cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2109c1ac3661SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2110c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
2111d0f46423SBarry Smith   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
211297e567efSBarry Smith   MatScalar   *ap, *aa = a->a;
2113cd0e1443SSatish Balay 
21143a40ed3dSBarry Smith   PetscFunctionBegin;
21152d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over rows */
21169371c9d4SSatish Balay     row  = im[k];
21179371c9d4SSatish Balay     brow = row / bs;
21189371c9d4SSatish Balay     if (row < 0) {
21199371c9d4SSatish Balay       v += n;
21209371c9d4SSatish Balay       continue;
21219371c9d4SSatish Balay     } /* negative row */
212254c59aa7SJacob Faibussowitsch     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
21238e3a54c0SPierre Jolivet     rp   = PetscSafePointerPlusOffset(aj, ai[brow]);
21248e3a54c0SPierre Jolivet     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
21252c3acbe9SBarry Smith     nrow = ailen[brow];
21262d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over columns */
21279371c9d4SSatish Balay       if (in[l] < 0) {
21289371c9d4SSatish Balay         v++;
21299371c9d4SSatish Balay         continue;
21309371c9d4SSatish Balay       } /* negative column */
213154c59aa7SJacob Faibussowitsch       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
21322d61bbb3SSatish Balay       col  = in[l];
21332d61bbb3SSatish Balay       bcol = col / bs;
21342d61bbb3SSatish Balay       cidx = col % bs;
21352d61bbb3SSatish Balay       ridx = row % bs;
21362d61bbb3SSatish Balay       high = nrow;
21372d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
21382d61bbb3SSatish Balay       while (high - low > 5) {
2139cd0e1443SSatish Balay         t = (low + high) / 2;
2140cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
2141cd0e1443SSatish Balay         else low = t;
2142cd0e1443SSatish Balay       }
2143cd0e1443SSatish Balay       for (i = low; i < high; i++) {
2144cd0e1443SSatish Balay         if (rp[i] > bcol) break;
2145cd0e1443SSatish Balay         if (rp[i] == bcol) {
21462d61bbb3SSatish Balay           *v++ = ap[bs2 * i + bs * cidx + ridx];
21472d61bbb3SSatish Balay           goto finished;
2148cd0e1443SSatish Balay         }
2149cd0e1443SSatish Balay       }
215097e567efSBarry Smith       *v++ = 0.0;
21512d61bbb3SSatish Balay     finished:;
2152cd0e1443SSatish Balay     }
2153cd0e1443SSatish Balay   }
21543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2155cd0e1443SSatish Balay }
2156cd0e1443SSatish Balay 
2157d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2158d71ae5a4SJacob Faibussowitsch {
215992c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2160e2ee6c50SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2161c1ac3661SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2162d0f46423SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2163ace3abfcSBarry Smith   PetscBool          roworiented = a->roworiented;
2164dd6ea824SBarry Smith   const PetscScalar *value       = v;
21659d243f67SHong Zhang   MatScalar         *ap = NULL, *aa = a->a, *bap;
216692c4ed94SBarry Smith 
21673a40ed3dSBarry Smith   PetscFunctionBegin;
21680e324ae4SSatish Balay   if (roworiented) {
21690e324ae4SSatish Balay     stepval = (n - 1) * bs;
21700e324ae4SSatish Balay   } else {
21710e324ae4SSatish Balay     stepval = (m - 1) * bs;
21720e324ae4SSatish Balay   }
217392c4ed94SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
217492c4ed94SBarry Smith     row = im[k];
21755ef9f2a5SBarry Smith     if (row < 0) continue;
21766bdcaf15SBarry Smith     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
217792c4ed94SBarry Smith     rp = aj + ai[row];
21787dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[row];
217992c4ed94SBarry Smith     rmax = imax[row];
218092c4ed94SBarry Smith     nrow = ailen[row];
218192c4ed94SBarry Smith     low  = 0;
2182c71e6ed7SBarry Smith     high = nrow;
218392c4ed94SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
21845ef9f2a5SBarry Smith       if (in[l] < 0) continue;
21856bdcaf15SBarry Smith       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
218692c4ed94SBarry Smith       col = in[l];
21877dc0baabSHong Zhang       if (!A->structure_only) {
218892c4ed94SBarry Smith         if (roworiented) {
218953ef36baSBarry Smith           value = v + (k * (stepval + bs) + l) * bs;
21900e324ae4SSatish Balay         } else {
219153ef36baSBarry Smith           value = v + (l * (stepval + bs) + k) * bs;
219292c4ed94SBarry Smith         }
21937dc0baabSHong Zhang       }
219426fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
219526fbe8dcSKarl Rupp       else high = nrow;
2196e2ee6c50SBarry Smith       lastcol = col;
219792c4ed94SBarry Smith       while (high - low > 7) {
219892c4ed94SBarry Smith         t = (low + high) / 2;
219992c4ed94SBarry Smith         if (rp[t] > col) high = t;
220092c4ed94SBarry Smith         else low = t;
220192c4ed94SBarry Smith       }
220292c4ed94SBarry Smith       for (i = low; i < high; i++) {
220392c4ed94SBarry Smith         if (rp[i] > col) break;
220492c4ed94SBarry Smith         if (rp[i] == col) {
22057dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
22068a84c255SSatish Balay           bap = ap + bs2 * i;
22070e324ae4SSatish Balay           if (roworiented) {
22088a84c255SSatish Balay             if (is == ADD_VALUES) {
2209dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2210ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2211dd9472c6SBarry Smith               }
22120e324ae4SSatish Balay             } else {
2213dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2214ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2215dd9472c6SBarry Smith               }
2216dd9472c6SBarry Smith             }
22170e324ae4SSatish Balay           } else {
22180e324ae4SSatish Balay             if (is == ADD_VALUES) {
221953ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2220ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
222153ef36baSBarry Smith                 bap += bs;
2222dd9472c6SBarry Smith               }
22230e324ae4SSatish Balay             } else {
222453ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2225ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
222653ef36baSBarry Smith                 bap += bs;
22278a84c255SSatish Balay               }
2228dd9472c6SBarry Smith             }
2229dd9472c6SBarry Smith           }
2230f1241b54SBarry Smith           goto noinsert2;
223192c4ed94SBarry Smith         }
223292c4ed94SBarry Smith       }
223389280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
22345f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
22357dc0baabSHong Zhang       if (A->structure_only) {
22367dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
22377dc0baabSHong Zhang       } else {
2238fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
22397dc0baabSHong Zhang       }
22409371c9d4SSatish Balay       N = nrow++ - 1;
22419371c9d4SSatish Balay       high++;
224292c4ed94SBarry Smith       /* shift up all the later entries in this row */
22439566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
224492c4ed94SBarry Smith       rp[i] = col;
22457dc0baabSHong Zhang       if (!A->structure_only) {
22469566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
22478a84c255SSatish Balay         bap = ap + bs2 * i;
22480e324ae4SSatish Balay         if (roworiented) {
2249dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2250ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2251dd9472c6SBarry Smith           }
22520e324ae4SSatish Balay         } else {
2253dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2254ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2255dd9472c6SBarry Smith           }
2256dd9472c6SBarry Smith         }
22577dc0baabSHong Zhang       }
2258f1241b54SBarry Smith     noinsert2:;
225992c4ed94SBarry Smith       low = i;
226092c4ed94SBarry Smith     }
226192c4ed94SBarry Smith     ailen[row] = nrow;
226292c4ed94SBarry Smith   }
22633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
226492c4ed94SBarry Smith }
226526e093fcSHong Zhang 
2266d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2267d71ae5a4SJacob Faibussowitsch {
2268584200bdSSatish Balay   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2269580bdb30SBarry Smith   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2270d0f46423SBarry Smith   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2271c1ac3661SBarry Smith   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
22723f1db9ecSBarry Smith   MatScalar   *aa    = a->a, *ap;
22733447b6efSHong Zhang   PetscReal    ratio = 0.6;
2274584200bdSSatish Balay 
22753a40ed3dSBarry Smith   PetscFunctionBegin;
2276d32568d8SPierre Jolivet   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2277584200bdSSatish Balay 
227843ee02c3SBarry Smith   if (m) rmax = ailen[0];
2279584200bdSSatish Balay   for (i = 1; i < mbs; i++) {
2280584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
2281584200bdSSatish Balay     fshift += imax[i - 1] - ailen[i - 1];
2282d402145bSBarry Smith     rmax = PetscMax(rmax, ailen[i]);
2283584200bdSSatish Balay     if (fshift) {
2284580bdb30SBarry Smith       ip = aj + ai[i];
2285580bdb30SBarry Smith       ap = aa + bs2 * ai[i];
2286584200bdSSatish Balay       N  = ailen[i];
22879566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(ip - fshift, ip, N));
228848a46eb9SPierre Jolivet       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2289672ba085SHong Zhang     }
2290584200bdSSatish Balay     ai[i] = ai[i - 1] + ailen[i - 1];
2291584200bdSSatish Balay   }
2292584200bdSSatish Balay   if (mbs) {
2293584200bdSSatish Balay     fshift += imax[mbs - 1] - ailen[mbs - 1];
2294584200bdSSatish Balay     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2295584200bdSSatish Balay   }
22967c565772SBarry Smith 
2297584200bdSSatish Balay   /* reset ilen and imax for each row */
22987c565772SBarry Smith   a->nonzerorowcnt = 0;
2299672ba085SHong Zhang   if (A->structure_only) {
23009566063dSJacob Faibussowitsch     PetscCall(PetscFree2(a->imax, a->ilen));
2301672ba085SHong Zhang   } else { /* !A->structure_only */
2302584200bdSSatish Balay     for (i = 0; i < mbs; i++) {
2303584200bdSSatish Balay       ailen[i] = imax[i] = ai[i + 1] - ai[i];
23047c565772SBarry Smith       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2305584200bdSSatish Balay     }
2306672ba085SHong Zhang   }
2307a7c10996SSatish Balay   a->nz = ai[mbs];
2308584200bdSSatish Balay 
2309584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
2310b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
2311ff6a9541SJacob Faibussowitsch   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
23125f80ce2aSJacob Faibussowitsch   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
23139566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
23149566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
23159566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
231626fbe8dcSKarl Rupp 
23178e58a170SBarry Smith   A->info.mallocs += a->reallocs;
2318e2f3b5e9SSatish Balay   a->reallocs         = 0;
23190e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2320647a6520SHong Zhang   a->rmax             = rmax;
2321cf4441caSHong Zhang 
232248a46eb9SPierre Jolivet   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
23233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2324584200bdSSatish Balay }
2325584200bdSSatish Balay 
2326bea157c4SSatish Balay /*
2327bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2328bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2329a5b23f4aSJose E. Roman    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2330bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2331bea157c4SSatish Balay */
2332d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2333d71ae5a4SJacob Faibussowitsch {
2334ff6a9541SJacob Faibussowitsch   PetscInt j = 0;
23353a40ed3dSBarry Smith 
2336433994e6SBarry Smith   PetscFunctionBegin;
2337ff6a9541SJacob Faibussowitsch   for (PetscInt i = 0; i < n; j++) {
2338ff6a9541SJacob Faibussowitsch     PetscInt row = idx[i];
2339a5b23f4aSJose E. Roman     if (row % bs != 0) { /* Not the beginning of a block */
2340bea157c4SSatish Balay       sizes[j] = 1;
2341bea157c4SSatish Balay       i++;
2342e4fda26cSSatish Balay     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2343bea157c4SSatish Balay       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2344bea157c4SSatish Balay       i++;
23456aad120cSJose E. Roman     } else { /* Beginning of the block, so check if the complete block exists */
2346ff6a9541SJacob Faibussowitsch       PetscBool flg = PETSC_TRUE;
2347ff6a9541SJacob Faibussowitsch       for (PetscInt k = 1; k < bs; k++) {
2348bea157c4SSatish Balay         if (row + k != idx[i + k]) { /* break in the block */
2349bea157c4SSatish Balay           flg = PETSC_FALSE;
2350bea157c4SSatish Balay           break;
2351d9b7c43dSSatish Balay         }
2352bea157c4SSatish Balay       }
2353abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2354bea157c4SSatish Balay         sizes[j] = bs;
2355bea157c4SSatish Balay         i += bs;
2356bea157c4SSatish Balay       } else {
2357bea157c4SSatish Balay         sizes[j] = 1;
2358bea157c4SSatish Balay         i++;
2359bea157c4SSatish Balay       }
2360bea157c4SSatish Balay     }
2361bea157c4SSatish Balay   }
2362bea157c4SSatish Balay   *bs_max = j;
23633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2364d9b7c43dSSatish Balay }
2365d9b7c43dSSatish Balay 
2366d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2367d71ae5a4SJacob Faibussowitsch {
2368d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2369f4df32b1SMatthew Knepley   PetscInt           i, j, k, count, *rows;
2370d0f46423SBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
237187828ca2SBarry Smith   PetscScalar        zero = 0.0;
23723f1db9ecSBarry Smith   MatScalar         *aa;
237397b48c8fSBarry Smith   const PetscScalar *xx;
237497b48c8fSBarry Smith   PetscScalar       *bb;
2375d9b7c43dSSatish Balay 
23763a40ed3dSBarry Smith   PetscFunctionBegin;
2377dd8e379bSPierre Jolivet   /* fix right-hand side if needed */
237897b48c8fSBarry Smith   if (x && b) {
23799566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23809566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
2381ad540459SPierre Jolivet     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
23829566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
23839566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
238497b48c8fSBarry Smith   }
238597b48c8fSBarry Smith 
2386d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2387bea157c4SSatish Balay   /* allocate memory for rows,sizes */
23889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2389bea157c4SSatish Balay 
2390563b5814SBarry Smith   /* copy IS values to rows, and sort them */
239126fbe8dcSKarl Rupp   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
23929566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(is_n, rows));
239397b48c8fSBarry Smith 
2394a9817697SBarry Smith   if (baij->keepnonzeropattern) {
239526fbe8dcSKarl Rupp     for (i = 0; i < is_n; i++) sizes[i] = 1;
2396dffd3267SBarry Smith     bs_max = is_n;
2397dffd3267SBarry Smith   } else {
23989566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2399e56f5c9eSBarry Smith     A->nonzerostate++;
2400dffd3267SBarry Smith   }
2401bea157c4SSatish Balay 
2402bea157c4SSatish Balay   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2403bea157c4SSatish Balay     row = rows[j];
24045f80ce2aSJacob Faibussowitsch     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2405bea157c4SSatish Balay     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2406835f2295SStefano Zampini     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2407a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2408d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2409bea157c4SSatish Balay         if (baij->ilen[row / bs] > 0) {
2410bea157c4SSatish Balay           baij->ilen[row / bs]       = 1;
2411bea157c4SSatish Balay           baij->j[baij->i[row / bs]] = row / bs;
241226fbe8dcSKarl Rupp 
24139566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(aa, count * bs));
2414a07cd24cSSatish Balay         }
2415563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
24169927e4dfSBarry Smith         for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2417f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2418bea157c4SSatish Balay         baij->ilen[row / bs] = 0;
2419f4df32b1SMatthew Knepley       } /* end (diag == 0.0) */
2420bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
24216bdcaf15SBarry Smith       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2422bea157c4SSatish Balay       for (k = 0; k < count; k++) {
2423d9b7c43dSSatish Balay         aa[0] = zero;
2424d9b7c43dSSatish Balay         aa += bs;
2425d9b7c43dSSatish Balay       }
24269927e4dfSBarry Smith       if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2427d9b7c43dSSatish Balay     }
2428bea157c4SSatish Balay   }
2429bea157c4SSatish Balay 
24309566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, sizes));
24319566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
24323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2433d9b7c43dSSatish Balay }
24341c351548SSatish Balay 
2435ff6a9541SJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2436d71ae5a4SJacob Faibussowitsch {
243797b48c8fSBarry Smith   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
243897b48c8fSBarry Smith   PetscInt           i, j, k, count;
243997b48c8fSBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
244097b48c8fSBarry Smith   PetscScalar        zero = 0.0;
244197b48c8fSBarry Smith   MatScalar         *aa;
244297b48c8fSBarry Smith   const PetscScalar *xx;
244397b48c8fSBarry Smith   PetscScalar       *bb;
244456777dd2SBarry Smith   PetscBool         *zeroed, vecs = PETSC_FALSE;
244597b48c8fSBarry Smith 
244697b48c8fSBarry Smith   PetscFunctionBegin;
2447dd8e379bSPierre Jolivet   /* fix right-hand side if needed */
244897b48c8fSBarry Smith   if (x && b) {
24499566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
24509566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
245156777dd2SBarry Smith     vecs = PETSC_TRUE;
245297b48c8fSBarry Smith   }
245397b48c8fSBarry Smith 
245497b48c8fSBarry Smith   /* zero the columns */
24559566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
245697b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
24575f80ce2aSJacob Faibussowitsch     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
245897b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
245997b48c8fSBarry Smith   }
246097b48c8fSBarry Smith   for (i = 0; i < A->rmap->N; i++) {
246197b48c8fSBarry Smith     if (!zeroed[i]) {
246297b48c8fSBarry Smith       row = i / bs;
246397b48c8fSBarry Smith       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
246497b48c8fSBarry Smith         for (k = 0; k < bs; k++) {
246597b48c8fSBarry Smith           col = bs * baij->j[j] + k;
246697b48c8fSBarry Smith           if (zeroed[col]) {
2467835f2295SStefano Zampini             aa = baij->a + j * bs2 + (i % bs) + bs * k;
246856777dd2SBarry Smith             if (vecs) bb[i] -= aa[0] * xx[col];
246997b48c8fSBarry Smith             aa[0] = 0.0;
247097b48c8fSBarry Smith           }
247197b48c8fSBarry Smith         }
247297b48c8fSBarry Smith       }
247356777dd2SBarry Smith     } else if (vecs) bb[i] = diag * xx[i];
247497b48c8fSBarry Smith   }
24759566063dSJacob Faibussowitsch   PetscCall(PetscFree(zeroed));
247656777dd2SBarry Smith   if (vecs) {
24779566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
24789566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
247956777dd2SBarry Smith   }
248097b48c8fSBarry Smith 
248197b48c8fSBarry Smith   /* zero the rows */
248297b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
248397b48c8fSBarry Smith     row   = is_idx[i];
248497b48c8fSBarry Smith     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2485835f2295SStefano Zampini     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
248697b48c8fSBarry Smith     for (k = 0; k < count; k++) {
248797b48c8fSBarry Smith       aa[0] = zero;
248897b48c8fSBarry Smith       aa += bs;
248997b48c8fSBarry Smith     }
2490dbbe0bcdSBarry Smith     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
249197b48c8fSBarry Smith   }
24929566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
24933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
249497b48c8fSBarry Smith }
249597b48c8fSBarry Smith 
2496d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2497d71ae5a4SJacob Faibussowitsch {
24982d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2499e2ee6c50SBarry Smith   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2500c1ac3661SBarry Smith   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2501d0f46423SBarry Smith   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2502c1ac3661SBarry Smith   PetscInt     ridx, cidx, bs2                 = a->bs2;
2503ace3abfcSBarry Smith   PetscBool    roworiented = a->roworiented;
2504d8cdefa3SHong Zhang   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
25052d61bbb3SSatish Balay 
25062d61bbb3SSatish Balay   PetscFunctionBegin;
25072d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over added rows */
2508085a36d4SBarry Smith     row  = im[k];
2509085a36d4SBarry Smith     brow = row / bs;
25105ef9f2a5SBarry Smith     if (row < 0) continue;
25116bdcaf15SBarry Smith     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
25128e3a54c0SPierre Jolivet     rp = PetscSafePointerPlusOffset(aj, ai[brow]);
25138e3a54c0SPierre Jolivet     if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
25142d61bbb3SSatish Balay     rmax = imax[brow];
25152d61bbb3SSatish Balay     nrow = ailen[brow];
25162d61bbb3SSatish Balay     low  = 0;
2517c71e6ed7SBarry Smith     high = nrow;
25182d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over added columns */
25195ef9f2a5SBarry Smith       if (in[l] < 0) continue;
25206bdcaf15SBarry Smith       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
25219371c9d4SSatish Balay       col  = in[l];
25229371c9d4SSatish Balay       bcol = col / bs;
25239371c9d4SSatish Balay       ridx = row % bs;
25249371c9d4SSatish Balay       cidx = col % bs;
2525672ba085SHong Zhang       if (!A->structure_only) {
25262d61bbb3SSatish Balay         if (roworiented) {
25275ef9f2a5SBarry Smith           value = v[l + k * n];
25282d61bbb3SSatish Balay         } else {
25292d61bbb3SSatish Balay           value = v[k + l * m];
25302d61bbb3SSatish Balay         }
2531672ba085SHong Zhang       }
25329371c9d4SSatish Balay       if (col <= lastcol) low = 0;
25339371c9d4SSatish Balay       else high = nrow;
2534e2ee6c50SBarry Smith       lastcol = col;
25352d61bbb3SSatish Balay       while (high - low > 7) {
25362d61bbb3SSatish Balay         t = (low + high) / 2;
25372d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
25382d61bbb3SSatish Balay         else low = t;
25392d61bbb3SSatish Balay       }
25402d61bbb3SSatish Balay       for (i = low; i < high; i++) {
25412d61bbb3SSatish Balay         if (rp[i] > bcol) break;
25422d61bbb3SSatish Balay         if (rp[i] == bcol) {
25438e3a54c0SPierre Jolivet           bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2544672ba085SHong Zhang           if (!A->structure_only) {
25452d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
25462d61bbb3SSatish Balay             else *bap = value;
2547672ba085SHong Zhang           }
25482d61bbb3SSatish Balay           goto noinsert1;
25492d61bbb3SSatish Balay         }
25502d61bbb3SSatish Balay       }
25512d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
25525f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2553672ba085SHong Zhang       if (A->structure_only) {
2554672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2555672ba085SHong Zhang       } else {
2556fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2557672ba085SHong Zhang       }
25589371c9d4SSatish Balay       N = nrow++ - 1;
25599371c9d4SSatish Balay       high++;
25602d61bbb3SSatish Balay       /* shift up all the later entries in this row */
25619566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
25622d61bbb3SSatish Balay       rp[i] = bcol;
2563580bdb30SBarry Smith       if (!A->structure_only) {
25649566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
25659566063dSJacob Faibussowitsch         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2566580bdb30SBarry Smith         ap[bs2 * i + bs * cidx + ridx] = value;
2567580bdb30SBarry Smith       }
2568085a36d4SBarry Smith       a->nz++;
25692d61bbb3SSatish Balay     noinsert1:;
25702d61bbb3SSatish Balay       low = i;
25712d61bbb3SSatish Balay     }
25722d61bbb3SSatish Balay     ailen[brow] = nrow;
25732d61bbb3SSatish Balay   }
25743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25752d61bbb3SSatish Balay }
25762d61bbb3SSatish Balay 
2577ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2578d71ae5a4SJacob Faibussowitsch {
25792d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
25802d61bbb3SSatish Balay   Mat          outA;
2581ace3abfcSBarry Smith   PetscBool    row_identity, col_identity;
25822d61bbb3SSatish Balay 
25832d61bbb3SSatish Balay   PetscFunctionBegin;
25845f80ce2aSJacob Faibussowitsch   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
25859566063dSJacob Faibussowitsch   PetscCall(ISIdentity(row, &row_identity));
25869566063dSJacob Faibussowitsch   PetscCall(ISIdentity(col, &col_identity));
25875f80ce2aSJacob Faibussowitsch   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
25882d61bbb3SSatish Balay 
25892d61bbb3SSatish Balay   outA            = inA;
2590d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
25919566063dSJacob Faibussowitsch   PetscCall(PetscFree(inA->solvertype));
25929566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
25932d61bbb3SSatish Balay 
25949566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2595cf242676SKris Buschelman 
25969566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)row));
25979566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
2598c3122656SLisandro Dalcin   a->row = row;
25999566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)col));
26009566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
2601c3122656SLisandro Dalcin   a->col = col;
2602c38d4ed2SBarry Smith 
2603c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
26049566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
26059566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2606c38d4ed2SBarry Smith 
26079566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2608aa624791SPierre Jolivet   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
26099566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(outA, inA, info));
26103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
26112d61bbb3SSatish Balay }
2612d9b7c43dSSatish Balay 
2613ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2614d71ae5a4SJacob Faibussowitsch {
261527a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
261627a8da17SBarry Smith 
261727a8da17SBarry Smith   PetscFunctionBegin;
2618ff6a9541SJacob Faibussowitsch   baij->nz = baij->maxnz;
2619ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2620ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
26213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
262227a8da17SBarry Smith }
262327a8da17SBarry Smith 
262427a8da17SBarry Smith /*@
2625d8a51d2aSBarry Smith   MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
262627a8da17SBarry Smith 
262727a8da17SBarry Smith   Input Parameters:
262811a5261eSBarry Smith + mat     - the `MATSEQBAIJ` matrix
2629d8a51d2aSBarry Smith - indices - the block column indices
263027a8da17SBarry Smith 
263115091d37SBarry Smith   Level: advanced
263215091d37SBarry Smith 
263327a8da17SBarry Smith   Notes:
263427a8da17SBarry Smith   This can be called if you have precomputed the nonzero structure of the
263527a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
263611a5261eSBarry Smith   of the `MatSetValues()` operation.
263727a8da17SBarry Smith 
263827a8da17SBarry Smith   You MUST have set the correct numbers of nonzeros per row in the call to
263911a5261eSBarry Smith   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
264027a8da17SBarry Smith 
264111a5261eSBarry Smith   MUST be called before any calls to `MatSetValues()`
264227a8da17SBarry Smith 
26431cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
264427a8da17SBarry Smith @*/
2645d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2646d71ae5a4SJacob Faibussowitsch {
264727a8da17SBarry Smith   PetscFunctionBegin;
26480700a824SBarry Smith   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
26494f572ea9SToby Isaac   PetscAssertPointer(indices, 2);
2650810441c8SPierre Jolivet   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
26513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
265227a8da17SBarry Smith }
265327a8da17SBarry Smith 
265466976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2655d71ae5a4SJacob Faibussowitsch {
2656273d9f13SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2657c1ac3661SBarry Smith   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2658273d9f13SBarry Smith   PetscReal    atmp;
265987828ca2SBarry Smith   PetscScalar *x, zero = 0.0;
2660273d9f13SBarry Smith   MatScalar   *aa;
2661c1ac3661SBarry Smith   PetscInt     ncols, brow, krow, kcol;
2662273d9f13SBarry Smith 
2663273d9f13SBarry Smith   PetscFunctionBegin;
26645f80ce2aSJacob Faibussowitsch   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2665d0f46423SBarry Smith   bs  = A->rmap->bs;
2666273d9f13SBarry Smith   aa  = a->a;
2667273d9f13SBarry Smith   ai  = a->i;
2668273d9f13SBarry Smith   aj  = a->j;
2669273d9f13SBarry Smith   mbs = a->mbs;
2670273d9f13SBarry Smith 
26719566063dSJacob Faibussowitsch   PetscCall(VecSet(v, zero));
26729566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
26739566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(v, &n));
26745f80ce2aSJacob Faibussowitsch   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2675273d9f13SBarry Smith   for (i = 0; i < mbs; i++) {
26769371c9d4SSatish Balay     ncols = ai[1] - ai[0];
26779371c9d4SSatish Balay     ai++;
2678273d9f13SBarry Smith     brow = bs * i;
2679273d9f13SBarry Smith     for (j = 0; j < ncols; j++) {
2680273d9f13SBarry Smith       for (kcol = 0; kcol < bs; kcol++) {
2681273d9f13SBarry Smith         for (krow = 0; krow < bs; krow++) {
26829371c9d4SSatish Balay           atmp = PetscAbsScalar(*aa);
26839371c9d4SSatish Balay           aa++;
2684273d9f13SBarry Smith           row = brow + krow; /* row index */
26859371c9d4SSatish Balay           if (PetscAbsScalar(x[row]) < atmp) {
26869371c9d4SSatish Balay             x[row] = atmp;
26879371c9d4SSatish Balay             if (idx) idx[row] = bs * (*aj) + kcol;
26889371c9d4SSatish Balay           }
2689273d9f13SBarry Smith         }
2690273d9f13SBarry Smith       }
2691273d9f13SBarry Smith       aj++;
2692273d9f13SBarry Smith     }
2693273d9f13SBarry Smith   }
26949566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
26953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2696273d9f13SBarry Smith }
2697273d9f13SBarry Smith 
2698eede4a3fSMark Adams static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2699eede4a3fSMark Adams {
2700eede4a3fSMark Adams   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2701eede4a3fSMark Adams   PetscInt     i, j, n, row, bs, *ai, mbs;
2702eede4a3fSMark Adams   PetscReal    atmp;
2703eede4a3fSMark Adams   PetscScalar *x, zero = 0.0;
2704eede4a3fSMark Adams   MatScalar   *aa;
2705eede4a3fSMark Adams   PetscInt     ncols, brow, krow, kcol;
2706eede4a3fSMark Adams 
2707eede4a3fSMark Adams   PetscFunctionBegin;
2708eede4a3fSMark Adams   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2709eede4a3fSMark Adams   bs  = A->rmap->bs;
2710eede4a3fSMark Adams   aa  = a->a;
2711eede4a3fSMark Adams   ai  = a->i;
2712eede4a3fSMark Adams   mbs = a->mbs;
2713eede4a3fSMark Adams 
2714eede4a3fSMark Adams   PetscCall(VecSet(v, zero));
2715eede4a3fSMark Adams   PetscCall(VecGetArrayWrite(v, &x));
2716eede4a3fSMark Adams   PetscCall(VecGetLocalSize(v, &n));
2717eede4a3fSMark Adams   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2718eede4a3fSMark Adams   for (i = 0; i < mbs; i++) {
2719eede4a3fSMark Adams     ncols = ai[1] - ai[0];
2720eede4a3fSMark Adams     ai++;
2721eede4a3fSMark Adams     brow = bs * i;
2722eede4a3fSMark Adams     for (j = 0; j < ncols; j++) {
2723eede4a3fSMark Adams       for (kcol = 0; kcol < bs; kcol++) {
2724eede4a3fSMark Adams         for (krow = 0; krow < bs; krow++) {
2725eede4a3fSMark Adams           atmp = PetscAbsScalar(*aa);
2726eede4a3fSMark Adams           aa++;
2727eede4a3fSMark Adams           row = brow + krow; /* row index */
2728eede4a3fSMark Adams           x[row] += atmp;
2729eede4a3fSMark Adams         }
2730eede4a3fSMark Adams       }
2731eede4a3fSMark Adams     }
2732eede4a3fSMark Adams   }
2733eede4a3fSMark Adams   PetscCall(VecRestoreArrayWrite(v, &x));
2734eede4a3fSMark Adams   PetscFunctionReturn(PETSC_SUCCESS);
2735eede4a3fSMark Adams }
2736eede4a3fSMark Adams 
273766976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2738d71ae5a4SJacob Faibussowitsch {
27393c896bc6SHong Zhang   PetscFunctionBegin;
27403c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
27413c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
27423c896bc6SHong Zhang     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
27433c896bc6SHong Zhang     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2744d88c0aacSHong Zhang     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
27453c896bc6SHong Zhang 
27465f80ce2aSJacob Faibussowitsch     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
27475f80ce2aSJacob Faibussowitsch     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
27489566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
27499566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)B));
27503c896bc6SHong Zhang   } else {
27519566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
27523c896bc6SHong Zhang   }
27533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
27543c896bc6SHong Zhang }
27553c896bc6SHong Zhang 
2756d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2757d71ae5a4SJacob Faibussowitsch {
2758f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27596e111a19SKarl Rupp 
2760f2a5309cSSatish Balay   PetscFunctionBegin;
2761f2a5309cSSatish Balay   *array = a->a;
27623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2763f2a5309cSSatish Balay }
2764f2a5309cSSatish Balay 
2765d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2766d71ae5a4SJacob Faibussowitsch {
2767f2a5309cSSatish Balay   PetscFunctionBegin;
2768cda14afcSprj-   *array = NULL;
27693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2770f2a5309cSSatish Balay }
2771f2a5309cSSatish Balay 
2772d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2773d71ae5a4SJacob Faibussowitsch {
2774b264fe52SHong Zhang   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
277552768537SHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
277652768537SHong Zhang   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
277752768537SHong Zhang 
277852768537SHong Zhang   PetscFunctionBegin;
277952768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
27809566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
27813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
278252768537SHong Zhang }
278352768537SHong Zhang 
2784d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2785d71ae5a4SJacob Faibussowitsch {
278642ee4b1aSHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
278731ce2d13SHong Zhang   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2788e838b9e7SJed Brown   PetscBLASInt one = 1;
278942ee4b1aSHong Zhang 
279042ee4b1aSHong Zhang   PetscFunctionBegin;
2791134adf20SPierre Jolivet   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2792134adf20SPierre Jolivet     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2793134adf20SPierre Jolivet     if (e) {
27949566063dSJacob Faibussowitsch       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2795134adf20SPierre Jolivet       if (e) {
27969566063dSJacob Faibussowitsch         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2797134adf20SPierre Jolivet         if (e) str = SAME_NONZERO_PATTERN;
2798134adf20SPierre Jolivet       }
2799134adf20SPierre Jolivet     }
280054c59aa7SJacob Faibussowitsch     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2801134adf20SPierre Jolivet   }
280242ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2803f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2804c5df96a5SBarry Smith     PetscBLASInt bnz;
28059566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2806792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
28079566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2808ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
28099566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
281042ee4b1aSHong Zhang   } else {
281152768537SHong Zhang     Mat       B;
281252768537SHong Zhang     PetscInt *nnz;
281354c59aa7SJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
28149566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
28159566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
28169566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
28179566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
28189566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
28199566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
28209566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
28219566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
28229566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
28239566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
28249566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
282542ee4b1aSHong Zhang   }
28263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
282742ee4b1aSHong Zhang }
282842ee4b1aSHong Zhang 
2829d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2830d71ae5a4SJacob Faibussowitsch {
2831ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
28322726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
28332726fb6dSPierre Jolivet   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
28342726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
28352726fb6dSPierre Jolivet 
28362726fb6dSPierre Jolivet   PetscFunctionBegin;
28372726fb6dSPierre Jolivet   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
28383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2839ff6a9541SJacob Faibussowitsch #else
2840ff6a9541SJacob Faibussowitsch   (void)A;
2841ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2842ff6a9541SJacob Faibussowitsch #endif
28432726fb6dSPierre Jolivet }
28442726fb6dSPierre Jolivet 
2845ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2846d71ae5a4SJacob Faibussowitsch {
2847ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
284899cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
284999cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2850dd6ea824SBarry Smith   MatScalar   *aa = a->a;
285199cafbc1SBarry Smith 
285299cafbc1SBarry Smith   PetscFunctionBegin;
285399cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
28543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2855ff6a9541SJacob Faibussowitsch #else
2856ff6a9541SJacob Faibussowitsch   (void)A;
2857ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2858ff6a9541SJacob Faibussowitsch #endif
285999cafbc1SBarry Smith }
286099cafbc1SBarry Smith 
2861ff6a9541SJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2862d71ae5a4SJacob Faibussowitsch {
2863ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
286499cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
286599cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2866dd6ea824SBarry Smith   MatScalar   *aa = a->a;
286799cafbc1SBarry Smith 
286899cafbc1SBarry Smith   PetscFunctionBegin;
286999cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
28703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2871ff6a9541SJacob Faibussowitsch #else
2872ff6a9541SJacob Faibussowitsch   (void)A;
2873ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2874ff6a9541SJacob Faibussowitsch #endif
287599cafbc1SBarry Smith }
287699cafbc1SBarry Smith 
28773acb8795SBarry Smith /*
28782479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
28793acb8795SBarry Smith */
2880ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2881d71ae5a4SJacob Faibussowitsch {
28823acb8795SBarry Smith   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
28833acb8795SBarry Smith   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
28843acb8795SBarry Smith   PetscInt     nz = a->i[m], row, *jj, mr, col;
28853acb8795SBarry Smith 
28863acb8795SBarry Smith   PetscFunctionBegin;
28873acb8795SBarry Smith   *nn = n;
28883ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28895f80ce2aSJacob Faibussowitsch   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
28909566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28933acb8795SBarry Smith   jj = a->j;
2894ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
28953acb8795SBarry Smith   cia[0] = oshift;
2896ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28979566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
28983acb8795SBarry Smith   jj = a->j;
28993acb8795SBarry Smith   for (row = 0; row < m; row++) {
29003acb8795SBarry Smith     mr = a->i[row + 1] - a->i[row];
29013acb8795SBarry Smith     for (i = 0; i < mr; i++) {
29023acb8795SBarry Smith       col = *jj++;
290326fbe8dcSKarl Rupp 
29043acb8795SBarry Smith       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
29053acb8795SBarry Smith     }
29063acb8795SBarry Smith   }
29079566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
29089371c9d4SSatish Balay   *ia = cia;
29099371c9d4SSatish Balay   *ja = cja;
29103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29113acb8795SBarry Smith }
29123acb8795SBarry Smith 
2913ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2914d71ae5a4SJacob Faibussowitsch {
29153acb8795SBarry Smith   PetscFunctionBegin;
29163ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
29179566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ia));
29189566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ja));
29193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29203acb8795SBarry Smith }
29213acb8795SBarry Smith 
2922525d23c0SHong Zhang /*
2923525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2924525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2925040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2926525d23c0SHong Zhang  */
2927d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2928d71ae5a4SJacob Faibussowitsch {
2929525d23c0SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2930c0349474SHong Zhang   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2931525d23c0SHong Zhang   PetscInt     nz = a->i[m], row, *jj, mr, col;
2932525d23c0SHong Zhang   PetscInt    *cspidx;
2933f6d58c54SBarry Smith 
2934f6d58c54SBarry Smith   PetscFunctionBegin;
2935525d23c0SHong Zhang   *nn = n;
29363ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2937f6d58c54SBarry Smith 
29389566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
29399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
29409566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
29419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cspidx));
2942525d23c0SHong Zhang   jj = a->j;
2943ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2944525d23c0SHong Zhang   cia[0] = oshift;
2945ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
29469566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
2947525d23c0SHong Zhang   jj = a->j;
2948525d23c0SHong Zhang   for (row = 0; row < m; row++) {
2949525d23c0SHong Zhang     mr = a->i[row + 1] - a->i[row];
2950525d23c0SHong Zhang     for (i = 0; i < mr; i++) {
2951525d23c0SHong Zhang       col                                         = *jj++;
2952525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2953525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2954525d23c0SHong Zhang     }
2955525d23c0SHong Zhang   }
29569566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
2957071fcb05SBarry Smith   *ia    = cia;
2958071fcb05SBarry Smith   *ja    = cja;
2959525d23c0SHong Zhang   *spidx = cspidx;
29603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2961f6d58c54SBarry Smith }
2962f6d58c54SBarry Smith 
2963d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2964d71ae5a4SJacob Faibussowitsch {
2965525d23c0SHong Zhang   PetscFunctionBegin;
29669566063dSJacob Faibussowitsch   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
29679566063dSJacob Faibussowitsch   PetscCall(PetscFree(*spidx));
29683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2969f6d58c54SBarry Smith }
297099cafbc1SBarry Smith 
297166976f2fSJacob Faibussowitsch static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2972d71ae5a4SJacob Faibussowitsch {
29737d68702bSBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
29747d68702bSBarry Smith 
29757d68702bSBarry Smith   PetscFunctionBegin;
297648a46eb9SPierre Jolivet   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
29779566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
29783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29797d68702bSBarry Smith }
29807d68702bSBarry Smith 
298117ea310bSPierre Jolivet PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
298217ea310bSPierre Jolivet {
298317ea310bSPierre Jolivet   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
298417ea310bSPierre Jolivet   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
298517ea310bSPierre Jolivet   PetscInt     m = A->rmap->N, *ailen = a->ilen;
298617ea310bSPierre Jolivet   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
298717ea310bSPierre Jolivet   MatScalar   *aa = a->a, *ap;
298817ea310bSPierre Jolivet   PetscBool    zero;
298917ea310bSPierre Jolivet 
299017ea310bSPierre Jolivet   PetscFunctionBegin;
299117ea310bSPierre Jolivet   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
299217ea310bSPierre Jolivet   if (m) rmax = ailen[0];
299317ea310bSPierre Jolivet   for (i = 1; i <= mbs; i++) {
299417ea310bSPierre Jolivet     for (k = ai[i - 1]; k < ai[i]; k++) {
299517ea310bSPierre Jolivet       zero = PETSC_TRUE;
299617ea310bSPierre Jolivet       ap   = aa + bs2 * k;
299717ea310bSPierre Jolivet       for (j = 0; j < bs2 && zero; j++) {
299817ea310bSPierre Jolivet         if (ap[j] != 0.0) zero = PETSC_FALSE;
299917ea310bSPierre Jolivet       }
300017ea310bSPierre Jolivet       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
300117ea310bSPierre Jolivet       else {
300217ea310bSPierre Jolivet         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
300317ea310bSPierre Jolivet         aj[k - fshift] = aj[k];
300417ea310bSPierre Jolivet         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
300517ea310bSPierre Jolivet       }
300617ea310bSPierre Jolivet     }
300717ea310bSPierre Jolivet     ai[i - 1] -= fshift_prev;
300817ea310bSPierre Jolivet     fshift_prev  = fshift;
300917ea310bSPierre Jolivet     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
301017ea310bSPierre Jolivet     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
301117ea310bSPierre Jolivet     rmax = PetscMax(rmax, ailen[i - 1]);
301217ea310bSPierre Jolivet   }
301317ea310bSPierre Jolivet   if (fshift) {
301417ea310bSPierre Jolivet     if (mbs) {
301517ea310bSPierre Jolivet       ai[mbs] -= fshift;
301617ea310bSPierre Jolivet       a->nz = ai[mbs];
301717ea310bSPierre Jolivet     }
301817ea310bSPierre Jolivet     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
301917ea310bSPierre Jolivet     A->nonzerostate++;
302017ea310bSPierre Jolivet     A->info.nz_unneeded += (PetscReal)fshift;
302117ea310bSPierre Jolivet     a->rmax = rmax;
302217ea310bSPierre Jolivet     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
302317ea310bSPierre Jolivet     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
302417ea310bSPierre Jolivet   }
302517ea310bSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
302617ea310bSPierre Jolivet }
302717ea310bSPierre Jolivet 
3028dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
3029cc2dc46cSBarry Smith                                        MatGetRow_SeqBAIJ,
3030cc2dc46cSBarry Smith                                        MatRestoreRow_SeqBAIJ,
3031cc2dc46cSBarry Smith                                        MatMult_SeqBAIJ_N,
303297304618SKris Buschelman                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
30337c922b88SBarry Smith                                        MatMultTranspose_SeqBAIJ,
30347c922b88SBarry Smith                                        MatMultTransposeAdd_SeqBAIJ,
3035f4259b30SLisandro Dalcin                                        NULL,
3036f4259b30SLisandro Dalcin                                        NULL,
3037f4259b30SLisandro Dalcin                                        NULL,
3038f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
3039cc2dc46cSBarry Smith                                        MatLUFactor_SeqBAIJ,
3040f4259b30SLisandro Dalcin                                        NULL,
3041f4259b30SLisandro Dalcin                                        NULL,
3042f2501298SSatish Balay                                        MatTranspose_SeqBAIJ,
304397304618SKris Buschelman                                        /* 15*/ MatGetInfo_SeqBAIJ,
3044cc2dc46cSBarry Smith                                        MatEqual_SeqBAIJ,
3045cc2dc46cSBarry Smith                                        MatGetDiagonal_SeqBAIJ,
3046cc2dc46cSBarry Smith                                        MatDiagonalScale_SeqBAIJ,
3047cc2dc46cSBarry Smith                                        MatNorm_SeqBAIJ,
3048f4259b30SLisandro Dalcin                                        /* 20*/ NULL,
3049cc2dc46cSBarry Smith                                        MatAssemblyEnd_SeqBAIJ,
3050cc2dc46cSBarry Smith                                        MatSetOption_SeqBAIJ,
3051cc2dc46cSBarry Smith                                        MatZeroEntries_SeqBAIJ,
3052d519adbfSMatthew Knepley                                        /* 24*/ MatZeroRows_SeqBAIJ,
3053f4259b30SLisandro Dalcin                                        NULL,
3054f4259b30SLisandro Dalcin                                        NULL,
3055f4259b30SLisandro Dalcin                                        NULL,
3056f4259b30SLisandro Dalcin                                        NULL,
305726cec326SBarry Smith                                        /* 29*/ MatSetUp_Seq_Hash,
3058f4259b30SLisandro Dalcin                                        NULL,
3059f4259b30SLisandro Dalcin                                        NULL,
3060f4259b30SLisandro Dalcin                                        NULL,
3061f4259b30SLisandro Dalcin                                        NULL,
3062d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_SeqBAIJ,
3063f4259b30SLisandro Dalcin                                        NULL,
3064f4259b30SLisandro Dalcin                                        NULL,
3065cc2dc46cSBarry Smith                                        MatILUFactor_SeqBAIJ,
3066f4259b30SLisandro Dalcin                                        NULL,
3067d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_SeqBAIJ,
30687dae84e0SHong Zhang                                        MatCreateSubMatrices_SeqBAIJ,
3069cc2dc46cSBarry Smith                                        MatIncreaseOverlap_SeqBAIJ,
3070cc2dc46cSBarry Smith                                        MatGetValues_SeqBAIJ,
30713c896bc6SHong Zhang                                        MatCopy_SeqBAIJ,
3072f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
3073cc2dc46cSBarry Smith                                        MatScale_SeqBAIJ,
30747d68702bSBarry Smith                                        MatShift_SeqBAIJ,
3075f4259b30SLisandro Dalcin                                        NULL,
307697b48c8fSBarry Smith                                        MatZeroRowsColumns_SeqBAIJ,
3077f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
30783b2fbd54SBarry Smith                                        MatGetRowIJ_SeqBAIJ,
307992c4ed94SBarry Smith                                        MatRestoreRowIJ_SeqBAIJ,
30803acb8795SBarry Smith                                        MatGetColumnIJ_SeqBAIJ,
30813acb8795SBarry Smith                                        MatRestoreColumnIJ_SeqBAIJ,
308293dfae19SHong Zhang                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3083f4259b30SLisandro Dalcin                                        NULL,
3084f4259b30SLisandro Dalcin                                        NULL,
3085090001bdSToby Isaac                                        NULL,
3086d3825aa8SBarry Smith                                        MatSetValuesBlocked_SeqBAIJ,
30877dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3088b9b97703SBarry Smith                                        MatDestroy_SeqBAIJ,
3089b9b97703SBarry Smith                                        MatView_SeqBAIJ,
3090f4259b30SLisandro Dalcin                                        NULL,
3091f4259b30SLisandro Dalcin                                        NULL,
3092f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
3093f4259b30SLisandro Dalcin                                        NULL,
3094f4259b30SLisandro Dalcin                                        NULL,
3095f4259b30SLisandro Dalcin                                        NULL,
30968bb0f5c6SPierre Jolivet                                        MatGetRowMaxAbs_SeqBAIJ,
30978bb0f5c6SPierre Jolivet                                        /* 69*/ NULL,
3098c87e5d42SMatthew Knepley                                        MatConvert_Basic,
3099f4259b30SLisandro Dalcin                                        NULL,
3100f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
3101f4259b30SLisandro Dalcin                                        NULL,
31028bb0f5c6SPierre Jolivet                                        /* 74*/ NULL,
3103f4259b30SLisandro Dalcin                                        NULL,
3104f4259b30SLisandro Dalcin                                        NULL,
3105f4259b30SLisandro Dalcin                                        NULL,
31065bba2384SShri Abhyankar                                        MatLoad_SeqBAIJ,
31078bb0f5c6SPierre Jolivet                                        /* 79*/ NULL,
31088bb0f5c6SPierre Jolivet                                        NULL,
31098bb0f5c6SPierre Jolivet                                        NULL,
31108bb0f5c6SPierre Jolivet                                        NULL,
31118bb0f5c6SPierre Jolivet                                        NULL,
3112f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
3113f4259b30SLisandro Dalcin                                        NULL,
3114f4259b30SLisandro Dalcin                                        NULL,
3115f4259b30SLisandro Dalcin                                        NULL,
3116f4259b30SLisandro Dalcin                                        NULL,
3117f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
3118f4259b30SLisandro Dalcin                                        NULL,
3119f4259b30SLisandro Dalcin                                        NULL,
3120f4259b30SLisandro Dalcin                                        NULL,
31218bb0f5c6SPierre Jolivet                                        MatConjugate_SeqBAIJ,
3122f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
3123f4259b30SLisandro Dalcin                                        NULL,
31248bb0f5c6SPierre Jolivet                                        MatRealPart_SeqBAIJ,
31258bb0f5c6SPierre Jolivet                                        MatImaginaryPart_SeqBAIJ,
3126f4259b30SLisandro Dalcin                                        NULL,
3127f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
3128f4259b30SLisandro Dalcin                                        NULL,
3129f4259b30SLisandro Dalcin                                        NULL,
3130f4259b30SLisandro Dalcin                                        NULL,
31318bb0f5c6SPierre Jolivet                                        NULL,
31328bb0f5c6SPierre Jolivet                                        /*104*/ MatMissingDiagonal_SeqBAIJ,
31338bb0f5c6SPierre Jolivet                                        NULL,
31348bb0f5c6SPierre Jolivet                                        NULL,
3135f4259b30SLisandro Dalcin                                        NULL,
3136f4259b30SLisandro Dalcin                                        NULL,
3137f4259b30SLisandro Dalcin                                        /*109*/ NULL,
3138f4259b30SLisandro Dalcin                                        NULL,
3139f4259b30SLisandro Dalcin                                        NULL,
3140547795f9SHong Zhang                                        MatMultHermitianTranspose_SeqBAIJ,
3141d6037b41SHong Zhang                                        MatMultHermitianTransposeAdd_SeqBAIJ,
31428bb0f5c6SPierre Jolivet                                        /*114*/ NULL,
3143f4259b30SLisandro Dalcin                                        NULL,
3144857cbf51SRichard Tran Mills                                        MatGetColumnReductions_SeqBAIJ,
31453964eb88SJed Brown                                        MatInvertBlockDiagonal_SeqBAIJ,
3146f4259b30SLisandro Dalcin                                        NULL,
31478bb0f5c6SPierre Jolivet                                        /*119*/ NULL,
3148f4259b30SLisandro Dalcin                                        NULL,
3149f4259b30SLisandro Dalcin                                        NULL,
3150f4259b30SLisandro Dalcin                                        NULL,
3151f4259b30SLisandro Dalcin                                        NULL,
31528bb0f5c6SPierre Jolivet                                        /*124*/ NULL,
31538bb0f5c6SPierre Jolivet                                        NULL,
31548bb0f5c6SPierre Jolivet                                        NULL,
31558bb0f5c6SPierre Jolivet                                        MatSetBlockSizes_Default,
31568bb0f5c6SPierre Jolivet                                        NULL,
31578bb0f5c6SPierre Jolivet                                        /*129*/ MatFDColoringSetUp_SeqXAIJ,
31588bb0f5c6SPierre Jolivet                                        NULL,
31598bb0f5c6SPierre Jolivet                                        MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
31608bb0f5c6SPierre Jolivet                                        MatDestroySubMatrices_SeqBAIJ,
31618bb0f5c6SPierre Jolivet                                        NULL,
3162f4259b30SLisandro Dalcin                                        /*134*/ NULL,
3163f4259b30SLisandro Dalcin                                        NULL,
3164f4259b30SLisandro Dalcin                                        NULL,
3165eede4a3fSMark Adams                                        MatEliminateZeros_SeqBAIJ,
31664cc2b5b5SPierre Jolivet                                        MatGetRowSumAbs_SeqBAIJ,
31678bb0f5c6SPierre Jolivet                                        /*139*/ NULL,
316842ce410bSJunchao Zhang                                        NULL,
316942ce410bSJunchao Zhang                                        NULL,
317003db1824SAlex Lindsay                                        MatCopyHashToXAIJ_Seq_Hash,
3171c2be7ffeSStefano Zampini                                        NULL,
317203db1824SAlex Lindsay                                        NULL};
31732593348eSBarry Smith 
3174ff6a9541SJacob Faibussowitsch static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3175d71ae5a4SJacob Faibussowitsch {
31763e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
31778ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
31783e90b805SBarry Smith 
31793e90b805SBarry Smith   PetscFunctionBegin;
31805f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
31813e90b805SBarry Smith 
31823e90b805SBarry Smith   /* allocate space for values if not already there */
3183ff6a9541SJacob Faibussowitsch   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
31843e90b805SBarry Smith 
31853e90b805SBarry Smith   /* copy values over */
31869566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
31873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31883e90b805SBarry Smith }
31893e90b805SBarry Smith 
3190ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3191d71ae5a4SJacob Faibussowitsch {
31923e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
31938ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
31943e90b805SBarry Smith 
31953e90b805SBarry Smith   PetscFunctionBegin;
31965f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
31975f80ce2aSJacob Faibussowitsch   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
31983e90b805SBarry Smith 
31993e90b805SBarry Smith   /* copy values over */
32009566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
32013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
32023e90b805SBarry Smith }
32033e90b805SBarry Smith 
3204cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3205cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3206273d9f13SBarry Smith 
3207f9663b93SPierre Jolivet PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3208d71ae5a4SJacob Faibussowitsch {
3209ad79cf63SBarry Smith   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3210535b19f3SBarry Smith   PetscInt     i, mbs, nbs, bs2;
32118afaa268SBarry Smith   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3212a23d5eceSKris Buschelman 
3213a23d5eceSKris Buschelman   PetscFunctionBegin;
3214ad79cf63SBarry Smith   if (B->hash_active) {
3215ad79cf63SBarry Smith     PetscInt bs;
3216aea10558SJacob Faibussowitsch     B->ops[0] = b->cops;
3217ad79cf63SBarry Smith     PetscCall(PetscHMapIJVDestroy(&b->ht));
3218ad79cf63SBarry Smith     PetscCall(MatGetBlockSize(B, &bs));
3219ad79cf63SBarry Smith     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3220ad79cf63SBarry Smith     PetscCall(PetscFree(b->dnz));
3221ad79cf63SBarry Smith     PetscCall(PetscFree(b->bdnz));
3222ad79cf63SBarry Smith     B->hash_active = PETSC_FALSE;
3223ad79cf63SBarry Smith   }
32242576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3225ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
3226ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
3227ab93d7beSBarry Smith     nz             = 0;
3228ab93d7beSBarry Smith   }
32298c07d4e3SBarry Smith 
323058b7e2c1SStefano Zampini   PetscCall(MatSetBlockSize(B, bs));
32319566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
32329566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
32339566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3234899cda47SBarry Smith 
3235899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
3236899cda47SBarry Smith 
3237d0f46423SBarry Smith   mbs = B->rmap->n / bs;
3238d0f46423SBarry Smith   nbs = B->cmap->n / bs;
3239a23d5eceSKris Buschelman   bs2 = bs * bs;
3240a23d5eceSKris Buschelman 
32415f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3242a23d5eceSKris Buschelman 
3243a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
32445f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3245a23d5eceSKris Buschelman   if (nnz) {
3246a23d5eceSKris Buschelman     for (i = 0; i < mbs; i++) {
32475f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
32485f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3249a23d5eceSKris Buschelman     }
3250a23d5eceSKris Buschelman   }
3251a23d5eceSKris Buschelman 
3252d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
32539566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3254d0609cedSBarry Smith   PetscOptionsEnd();
32558c07d4e3SBarry Smith 
3256a23d5eceSKris Buschelman   if (!flg) {
3257a23d5eceSKris Buschelman     switch (bs) {
3258a23d5eceSKris Buschelman     case 1:
3259a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
3260a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3261a23d5eceSKris Buschelman       break;
3262a23d5eceSKris Buschelman     case 2:
3263a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
3264a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3265a23d5eceSKris Buschelman       break;
3266a23d5eceSKris Buschelman     case 3:
3267a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
3268a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3269a23d5eceSKris Buschelman       break;
3270a23d5eceSKris Buschelman     case 4:
3271a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
3272a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3273a23d5eceSKris Buschelman       break;
3274a23d5eceSKris Buschelman     case 5:
3275a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
3276a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3277a23d5eceSKris Buschelman       break;
3278a23d5eceSKris Buschelman     case 6:
3279a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
3280a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3281a23d5eceSKris Buschelman       break;
3282a23d5eceSKris Buschelman     case 7:
3283a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
3284a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3285a23d5eceSKris Buschelman       break;
32869371c9d4SSatish Balay     case 9: {
32876679dcc1SBarry Smith       PetscInt version = 1;
32889566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32896679dcc1SBarry Smith       switch (version) {
32905f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32916679dcc1SBarry Smith       case 1:
329296e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
329396e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3294835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32956679dcc1SBarry Smith         break;
32966679dcc1SBarry Smith #endif
32976679dcc1SBarry Smith       default:
329896e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
329996e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3300835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
330196e086a2SDaniel Kokron         break;
33026679dcc1SBarry Smith       }
33036679dcc1SBarry Smith       break;
33046679dcc1SBarry Smith     }
3305ebada01fSBarry Smith     case 11:
3306ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
3307ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3308ebada01fSBarry Smith       break;
33099371c9d4SSatish Balay     case 12: {
33106679dcc1SBarry Smith       PetscInt version = 1;
33119566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
33126679dcc1SBarry Smith       switch (version) {
33136679dcc1SBarry Smith       case 1:
33146679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
33156679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3316835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33178ab949d8SShri Abhyankar         break;
33186679dcc1SBarry Smith       case 2:
33196679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
33206679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3321835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33226679dcc1SBarry Smith         break;
33236679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
33246679dcc1SBarry Smith       case 3:
33256679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
33266679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3327835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
33286679dcc1SBarry Smith         break;
33296679dcc1SBarry Smith #endif
3330a23d5eceSKris Buschelman       default:
3331a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
3332a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3333835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
33346679dcc1SBarry Smith         break;
33356679dcc1SBarry Smith       }
33366679dcc1SBarry Smith       break;
33376679dcc1SBarry Smith     }
33389371c9d4SSatish Balay     case 15: {
33396679dcc1SBarry Smith       PetscInt version = 1;
33409566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
33416679dcc1SBarry Smith       switch (version) {
33426679dcc1SBarry Smith       case 1:
33436679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3344835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33456679dcc1SBarry Smith         break;
33466679dcc1SBarry Smith       case 2:
33476679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3348835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33496679dcc1SBarry Smith         break;
33506679dcc1SBarry Smith       case 3:
33516679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3352835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33536679dcc1SBarry Smith         break;
33546679dcc1SBarry Smith       case 4:
33556679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3356835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33576679dcc1SBarry Smith         break;
33586679dcc1SBarry Smith       default:
33596679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_N;
3360835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
33616679dcc1SBarry Smith         break;
33626679dcc1SBarry Smith       }
33636679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
33646679dcc1SBarry Smith       break;
33656679dcc1SBarry Smith     }
33666679dcc1SBarry Smith     default:
33676679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
33686679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3369835f2295SStefano Zampini       PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3370a23d5eceSKris Buschelman       break;
3371a23d5eceSKris Buschelman     }
3372a23d5eceSKris Buschelman   }
3373e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
3374a23d5eceSKris Buschelman   b->mbs      = mbs;
3375a23d5eceSKris Buschelman   b->nbs      = nbs;
3376ab93d7beSBarry Smith   if (!skipallocation) {
33772ee49352SLisandro Dalcin     if (!b->imax) {
33789566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
337926fbe8dcSKarl Rupp 
33804fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
33812ee49352SLisandro Dalcin     }
3382ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
338326fbe8dcSKarl Rupp     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3384a23d5eceSKris Buschelman     if (!nnz) {
3385a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3386c62bd62aSJed Brown       else if (nz < 0) nz = 1;
33875d2a9ed1SStefano Zampini       nz = PetscMin(nz, nbs);
3388a23d5eceSKris Buschelman       for (i = 0; i < mbs; i++) b->imax[i] = nz;
33899566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, mbs, &nz));
3390a23d5eceSKris Buschelman     } else {
3391c73702f5SBarry Smith       PetscInt64 nz64 = 0;
33929371c9d4SSatish Balay       for (i = 0; i < mbs; i++) {
33939371c9d4SSatish Balay         b->imax[i] = nnz[i];
33949371c9d4SSatish Balay         nz64 += nnz[i];
33959371c9d4SSatish Balay       }
33969566063dSJacob Faibussowitsch       PetscCall(PetscIntCast(nz64, &nz));
3397a23d5eceSKris Buschelman     }
3398a23d5eceSKris Buschelman 
3399a23d5eceSKris Buschelman     /* allocate the matrix space */
34009566063dSJacob Faibussowitsch     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
34019f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
34029f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3403672ba085SHong Zhang     if (B->structure_only) {
34049f0612e4SBarry Smith       b->free_a = PETSC_FALSE;
3405672ba085SHong Zhang     } else {
34066679dcc1SBarry Smith       PetscInt nzbs2 = 0;
34079566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
34089f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
34099f0612e4SBarry Smith       b->free_a = PETSC_TRUE;
34109566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(b->a, nz * bs2));
3411672ba085SHong Zhang     }
3412672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
34139f0612e4SBarry Smith     PetscCall(PetscArrayzero(b->j, nz));
3414672ba085SHong Zhang 
3415a23d5eceSKris Buschelman     b->i[0] = 0;
3416ad540459SPierre Jolivet     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3417e811da20SHong Zhang   } else {
3418e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3419e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3420ab93d7beSBarry Smith   }
3421a23d5eceSKris Buschelman 
3422a23d5eceSKris Buschelman   b->bs2              = bs2;
3423a23d5eceSKris Buschelman   b->mbs              = mbs;
3424a23d5eceSKris Buschelman   b->nz               = 0;
3425b32cb4a7SJed Brown   b->maxnz            = nz;
3426b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3427cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3428cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
34299566063dSJacob Faibussowitsch   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
34303ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3431a23d5eceSKris Buschelman }
3432a23d5eceSKris Buschelman 
343366976f2fSJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3434d71ae5a4SJacob Faibussowitsch {
3435725b52f3SLisandro Dalcin   PetscInt     i, m, nz, nz_max = 0, *nnz;
3436f4259b30SLisandro Dalcin   PetscScalar *values      = NULL;
3437d47bf9aaSJed Brown   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3438725b52f3SLisandro Dalcin 
3439725b52f3SLisandro Dalcin   PetscFunctionBegin;
34405f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
34419566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
34429566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
34439566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
34449566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
34459566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3446d0f46423SBarry Smith   m = B->rmap->n / bs;
3447725b52f3SLisandro Dalcin 
34485f80ce2aSJacob Faibussowitsch   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
34499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &nnz));
3450725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3451cf12db73SBarry Smith     nz = ii[i + 1] - ii[i];
34525f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3453725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3454725b52f3SLisandro Dalcin     nnz[i] = nz;
3455725b52f3SLisandro Dalcin   }
34569566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
34579566063dSJacob Faibussowitsch   PetscCall(PetscFree(nnz));
3458725b52f3SLisandro Dalcin 
3459725b52f3SLisandro Dalcin   values = (PetscScalar *)V;
346048a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3461725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3462cf12db73SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
3463cf12db73SBarry Smith     const PetscInt *icols = jj + ii[i];
3464bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3465cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
34669566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
34673adadaf3SJed Brown     } else {
34683adadaf3SJed Brown       PetscInt j;
34693adadaf3SJed Brown       for (j = 0; j < ncols; j++) {
34703adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
34719566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
34723adadaf3SJed Brown       }
34733adadaf3SJed Brown     }
3474725b52f3SLisandro Dalcin   }
34759566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
34769566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
34779566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
34789566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
34793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3480725b52f3SLisandro Dalcin }
3481725b52f3SLisandro Dalcin 
3482cda14afcSprj- /*@C
348311a5261eSBarry Smith   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3484cda14afcSprj- 
3485cda14afcSprj-   Not Collective
3486cda14afcSprj- 
3487cda14afcSprj-   Input Parameter:
3488fe59aa6dSJacob Faibussowitsch . A - a `MATSEQBAIJ` matrix
3489cda14afcSprj- 
3490cda14afcSprj-   Output Parameter:
3491cda14afcSprj- . array - pointer to the data
3492cda14afcSprj- 
3493cda14afcSprj-   Level: intermediate
3494cda14afcSprj- 
34951cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3496cda14afcSprj- @*/
34975d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3498d71ae5a4SJacob Faibussowitsch {
3499cda14afcSprj-   PetscFunctionBegin;
3500cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
35013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3502cda14afcSprj- }
3503cda14afcSprj- 
3504cda14afcSprj- /*@C
350511a5261eSBarry Smith   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3506cda14afcSprj- 
3507cda14afcSprj-   Not Collective
3508cda14afcSprj- 
3509cda14afcSprj-   Input Parameters:
3510fe59aa6dSJacob Faibussowitsch + A     - a `MATSEQBAIJ` matrix
3511cda14afcSprj- - array - pointer to the data
3512cda14afcSprj- 
3513cda14afcSprj-   Level: intermediate
3514cda14afcSprj- 
35151cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3516cda14afcSprj- @*/
35175d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3518d71ae5a4SJacob Faibussowitsch {
3519cda14afcSprj-   PetscFunctionBegin;
3520cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
35213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3522cda14afcSprj- }
3523cda14afcSprj- 
35240bad9183SKris Buschelman /*MC
3525fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
35260bad9183SKris Buschelman    block sparse compressed row format.
35270bad9183SKris Buschelman 
35280bad9183SKris Buschelman    Options Database Keys:
352920f4b53cSBarry Smith + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
35306679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
35310bad9183SKris Buschelman 
35320bad9183SKris Buschelman    Level: beginner
35330cd7f59aSBarry Smith 
35340cd7f59aSBarry Smith    Notes:
353511a5261eSBarry Smith    `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
353611a5261eSBarry Smith    space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
35370bad9183SKris Buschelman 
35382ef1f0ffSBarry Smith    Run with `-info` to see what version of the matrix-vector product is being used
35396679dcc1SBarry Smith 
35401cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
35410bad9183SKris Buschelman M*/
35420bad9183SKris Buschelman 
3543cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3544b24902e0SBarry Smith 
3545d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3546d71ae5a4SJacob Faibussowitsch {
3547c1ac3661SBarry Smith   PetscMPIInt  size;
3548b6490206SBarry Smith   Mat_SeqBAIJ *b;
35493b2fbd54SBarry Smith 
35503a40ed3dSBarry Smith   PetscFunctionBegin;
35519566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
35525f80ce2aSJacob Faibussowitsch   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3553b6490206SBarry Smith 
35544dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
3555b0a32e0cSBarry Smith   B->data   = (void *)b;
3556aea10558SJacob Faibussowitsch   B->ops[0] = MatOps_Values;
355726fbe8dcSKarl Rupp 
3558f4259b30SLisandro Dalcin   b->row          = NULL;
3559f4259b30SLisandro Dalcin   b->col          = NULL;
3560f4259b30SLisandro Dalcin   b->icol         = NULL;
35612593348eSBarry Smith   b->reallocs     = 0;
3562f4259b30SLisandro Dalcin   b->saved_values = NULL;
35632593348eSBarry Smith 
3564c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
35652593348eSBarry Smith   b->nonew              = 0;
3566f4259b30SLisandro Dalcin   b->diag               = NULL;
3567f4259b30SLisandro Dalcin   B->spptr              = NULL;
3568b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3569a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
35704e220ebcSLois Curfman McInnes 
35719566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
35729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
35739566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
35749566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
35759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
35769566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
35779566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
35789566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
35799566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
35809566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
35817ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
35829566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
35837ea3e4caSstefano_zampini #endif
35849566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
35859566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
35863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
35872593348eSBarry Smith }
35882593348eSBarry Smith 
3589d6acfc2dSPierre Jolivet PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3590d71ae5a4SJacob Faibussowitsch {
3591b24902e0SBarry Smith   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3592a96a251dSBarry Smith   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3593de6a44a3SBarry Smith 
35943a40ed3dSBarry Smith   PetscFunctionBegin;
359531fe6a7dSBarry Smith   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
35965f80ce2aSJacob Faibussowitsch   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
35972593348eSBarry Smith 
35984fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35994fd072dbSBarry Smith     c->imax           = a->imax;
36004fd072dbSBarry Smith     c->ilen           = a->ilen;
36014fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
36024fd072dbSBarry Smith   } else {
36039566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3604b6490206SBarry Smith     for (i = 0; i < mbs; i++) {
36052593348eSBarry Smith       c->imax[i] = a->imax[i];
36062593348eSBarry Smith       c->ilen[i] = a->ilen[i];
36072593348eSBarry Smith     }
36084fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
36094fd072dbSBarry Smith   }
36102593348eSBarry Smith 
36112593348eSBarry Smith   /* allocate the matrix space */
361216a2bf60SHong Zhang   if (mallocmatspace) {
36134fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
36149f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
36159f0612e4SBarry Smith       PetscCall(PetscArrayzero(c->a, bs2 * nz));
36169f0612e4SBarry Smith       c->free_a       = PETSC_TRUE;
36174fd072dbSBarry Smith       c->i            = a->i;
36184fd072dbSBarry Smith       c->j            = a->j;
3619379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
36204fd072dbSBarry Smith       c->parent       = A;
36211e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
36221e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
362326fbe8dcSKarl Rupp 
36249566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)A));
36259566063dSJacob Faibussowitsch       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
36269566063dSJacob Faibussowitsch       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
36274fd072dbSBarry Smith     } else {
36289f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
36299f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
36309f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3631379be0ddSLisandro Dalcin       c->free_a  = PETSC_TRUE;
36324fd072dbSBarry Smith       c->free_ij = PETSC_TRUE;
363326fbe8dcSKarl Rupp 
36349566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3635b6490206SBarry Smith       if (mbs > 0) {
36369566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(c->j, a->j, nz));
36372e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
36389566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
36392e8a6d31SBarry Smith         } else {
36409566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(c->a, bs2 * nz));
36412593348eSBarry Smith         }
36422593348eSBarry Smith       }
36431e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
36441e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
364516a2bf60SHong Zhang     }
36464fd072dbSBarry Smith   }
364716a2bf60SHong Zhang 
36482593348eSBarry Smith   c->roworiented = a->roworiented;
36492593348eSBarry Smith   c->nonew       = a->nonew;
365026fbe8dcSKarl Rupp 
36519566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
36529566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
365326fbe8dcSKarl Rupp 
36545c9eb25fSBarry Smith   c->bs2 = a->bs2;
36555c9eb25fSBarry Smith   c->mbs = a->mbs;
36565c9eb25fSBarry Smith   c->nbs = a->nbs;
36572593348eSBarry Smith 
36582593348eSBarry Smith   if (a->diag) {
36594fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
36604fd072dbSBarry Smith       c->diag      = a->diag;
36614fd072dbSBarry Smith       c->free_diag = PETSC_FALSE;
36624fd072dbSBarry Smith     } else {
36639566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
366426fbe8dcSKarl Rupp       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
36654fd072dbSBarry Smith       c->free_diag = PETSC_TRUE;
36664fd072dbSBarry Smith     }
3667f4259b30SLisandro Dalcin   } else c->diag = NULL;
366826fbe8dcSKarl Rupp 
36692593348eSBarry Smith   c->nz         = a->nz;
3670f2cbd3d5SJed Brown   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3671f361c04dSBarry Smith   c->solve_work = NULL;
3672f361c04dSBarry Smith   c->mult_work  = NULL;
3673f361c04dSBarry Smith   c->sor_workt  = NULL;
3674f361c04dSBarry Smith   c->sor_work   = NULL;
367588e51ccdSHong Zhang 
367688e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
367788e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3678cd6b891eSBarry Smith   if (a->compressedrow.use) {
367988e51ccdSHong Zhang     i = a->compressedrow.nrows;
36809566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
36819566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
36829566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
368388e51ccdSHong Zhang   } else {
368488e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
36850298fd71SBarry Smith     c->compressedrow.i      = NULL;
36860298fd71SBarry Smith     c->compressedrow.rindex = NULL;
368788e51ccdSHong Zhang   }
3688c05f355bSMark Adams   c->nonzerorowcnt = a->nonzerorowcnt;
3689e56f5c9eSBarry Smith   C->nonzerostate  = A->nonzerostate;
369026fbe8dcSKarl Rupp 
36919566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
36923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
36932593348eSBarry Smith }
36942593348eSBarry Smith 
3695d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3696d71ae5a4SJacob Faibussowitsch {
3697b24902e0SBarry Smith   PetscFunctionBegin;
36989566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
36999566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
37009566063dSJacob Faibussowitsch   PetscCall(MatSetType(*B, MATSEQBAIJ));
37019566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
37023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3703b24902e0SBarry Smith }
3704b24902e0SBarry Smith 
3705618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3706d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3707d71ae5a4SJacob Faibussowitsch {
3708b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3709b51a4376SLisandro Dalcin   PetscInt    *rowidxs, *colidxs;
3710b51a4376SLisandro Dalcin   PetscScalar *matvals;
3711b51a4376SLisandro Dalcin 
3712b51a4376SLisandro Dalcin   PetscFunctionBegin;
37139566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
3714b51a4376SLisandro Dalcin 
3715b51a4376SLisandro Dalcin   /* read matrix header */
37169566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
37175f80ce2aSJacob Faibussowitsch   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
37189371c9d4SSatish Balay   M  = header[1];
37199371c9d4SSatish Balay   N  = header[2];
37209371c9d4SSatish Balay   nz = header[3];
37215f80ce2aSJacob Faibussowitsch   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
37225f80ce2aSJacob Faibussowitsch   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
37235f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3724b51a4376SLisandro Dalcin 
3725b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
37269566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3727b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3728b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3729b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3730b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3731b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
37329566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
37339566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
3734b51a4376SLisandro Dalcin 
3735b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
37369566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
37375f80ce2aSJacob Faibussowitsch   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
37389566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
37399566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
37409371c9d4SSatish Balay   mbs = m / bs;
37419371c9d4SSatish Balay   nbs = n / bs;
3742b51a4376SLisandro Dalcin 
3743b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
37449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
37459566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
37469371c9d4SSatish Balay   rowidxs[0] = 0;
37479371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3748b51a4376SLisandro Dalcin   sum = rowidxs[m];
37495f80ce2aSJacob Faibussowitsch   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3750b51a4376SLisandro Dalcin 
3751b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
37529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
37539566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
37549566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3755b51a4376SLisandro Dalcin 
3756b51a4376SLisandro Dalcin   {               /* preallocate matrix storage */
3757b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3758b51a4376SLisandro Dalcin     PetscInt *nnz;
3759618cc2edSLisandro Dalcin     PetscBool sbaij;
3760b51a4376SLisandro Dalcin 
37619566063dSJacob Faibussowitsch     PetscCall(PetscBTCreate(nbs, &bt));
37629566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(mbs, &nnz));
37639566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3764b51a4376SLisandro Dalcin     for (i = 0; i < mbs; i++) {
37659566063dSJacob Faibussowitsch       PetscCall(PetscBTMemzero(nbs, bt));
3766618cc2edSLisandro Dalcin       for (k = 0; k < bs; k++) {
3767618cc2edSLisandro Dalcin         PetscInt row = bs * i + k;
3768618cc2edSLisandro Dalcin         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3769618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3770618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3771618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3772618cc2edSLisandro Dalcin         }
3773618cc2edSLisandro Dalcin       }
3774b51a4376SLisandro Dalcin     }
37759566063dSJacob Faibussowitsch     PetscCall(PetscBTDestroy(&bt));
37769566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
37779566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
37789566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
3779b51a4376SLisandro Dalcin   }
3780b51a4376SLisandro Dalcin 
3781b51a4376SLisandro Dalcin   /* store matrix values */
3782b51a4376SLisandro Dalcin   for (i = 0; i < m; i++) {
3783b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
37849927e4dfSBarry Smith     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3785b51a4376SLisandro Dalcin   }
3786b51a4376SLisandro Dalcin 
37879566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
37889566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
37899566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
37909566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
37913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3792b51a4376SLisandro Dalcin }
3793b51a4376SLisandro Dalcin 
3794d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3795d71ae5a4SJacob Faibussowitsch {
37967f489da9SVaclav Hapla   PetscBool isbinary;
3797f501eaabSShri Abhyankar 
3798f501eaabSShri Abhyankar   PetscFunctionBegin;
37999566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
38005f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
38019566063dSJacob Faibussowitsch   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
38023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3803f501eaabSShri Abhyankar }
3804f501eaabSShri Abhyankar 
38055d83a8b1SBarry Smith /*@
380611a5261eSBarry Smith   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3807273d9f13SBarry Smith   compressed row) format.  For good matrix assembly performance the
380820f4b53cSBarry Smith   user should preallocate the matrix storage by setting the parameter `nz`
380920f4b53cSBarry Smith   (or the array `nnz`).
38102593348eSBarry Smith 
3811d083f849SBarry Smith   Collective
3812273d9f13SBarry Smith 
3813273d9f13SBarry Smith   Input Parameters:
381411a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
381511a5261eSBarry Smith . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
381611a5261eSBarry Smith          blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3817273d9f13SBarry Smith . m    - number of rows
3818273d9f13SBarry Smith . n    - number of columns
381935d8aa7fSBarry Smith . nz   - number of nonzero blocks  per block row (same for all rows)
382035d8aa7fSBarry Smith - nnz  - array containing the number of nonzero blocks in the various block rows
382120f4b53cSBarry Smith          (possibly different for each block row) or `NULL`
3822273d9f13SBarry Smith 
3823273d9f13SBarry Smith   Output Parameter:
3824273d9f13SBarry Smith . A - the matrix
3825273d9f13SBarry Smith 
3826273d9f13SBarry Smith   Options Database Keys:
382711a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3828a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3829273d9f13SBarry Smith 
3830273d9f13SBarry Smith   Level: intermediate
3831273d9f13SBarry Smith 
3832273d9f13SBarry Smith   Notes:
383377433607SBarry Smith   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
38342ef1f0ffSBarry Smith   MatXXXXSetPreallocation() paradigm instead of this routine directly.
38352ef1f0ffSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
38362ef1f0ffSBarry Smith 
3837d1be2dadSMatthew Knepley   The number of rows and columns must be divisible by blocksize.
3838d1be2dadSMatthew Knepley 
38392ef1f0ffSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
384049a6f317SBarry Smith 
384135d8aa7fSBarry Smith   A nonzero block is any block that as 1 or more nonzeros in it
384235d8aa7fSBarry Smith 
38432ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3844273d9f13SBarry Smith   storage.  That is, the stored row and column indices can begin at
384520f4b53cSBarry Smith   either one (as in Fortran) or zero.
3846273d9f13SBarry Smith 
38472ef1f0ffSBarry Smith   Specify the preallocated storage with either `nz` or `nnz` (not both).
38482ef1f0ffSBarry Smith   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3849651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3850273d9f13SBarry Smith   matrices.
3851273d9f13SBarry Smith 
38521cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3853273d9f13SBarry Smith @*/
3854d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3855d71ae5a4SJacob Faibussowitsch {
3856273d9f13SBarry Smith   PetscFunctionBegin;
38579566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
38589566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
38599566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJ));
38609566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
38613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3862273d9f13SBarry Smith }
3863273d9f13SBarry Smith 
38645d83a8b1SBarry Smith /*@
3865273d9f13SBarry Smith   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3866273d9f13SBarry Smith   per row in the matrix. For good matrix assembly performance the
386720f4b53cSBarry Smith   user should preallocate the matrix storage by setting the parameter `nz`
386820f4b53cSBarry Smith   (or the array `nnz`).
3869273d9f13SBarry Smith 
3870d083f849SBarry Smith   Collective
3871273d9f13SBarry Smith 
3872273d9f13SBarry Smith   Input Parameters:
38731c4f3114SJed Brown + B   - the matrix
387411a5261eSBarry Smith . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
387511a5261eSBarry Smith         blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3876273d9f13SBarry Smith . nz  - number of block nonzeros per block row (same for all rows)
3877273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows
38782ef1f0ffSBarry Smith         (possibly different for each block row) or `NULL`
3879273d9f13SBarry Smith 
3880273d9f13SBarry Smith   Options Database Keys:
388111a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3882a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3883273d9f13SBarry Smith 
3884273d9f13SBarry Smith   Level: intermediate
3885273d9f13SBarry Smith 
3886273d9f13SBarry Smith   Notes:
38872ef1f0ffSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
388849a6f317SBarry Smith 
388911a5261eSBarry Smith   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3890aa95bbe8SBarry Smith   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
389120f4b53cSBarry Smith   You can also run with the option `-info` and look for messages with the string
3892aa95bbe8SBarry Smith   malloc in them to see if additional memory allocation was needed.
3893aa95bbe8SBarry Smith 
38942ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3895273d9f13SBarry Smith   storage.  That is, the stored row and column indices can begin at
389620f4b53cSBarry Smith   either one (as in Fortran) or zero.
3897273d9f13SBarry Smith 
3898d8a51d2aSBarry Smith   Specify the preallocated storage with either `nz` or `nnz` (not both).
38992ef1f0ffSBarry Smith   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3900651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3901273d9f13SBarry Smith 
39021cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3903273d9f13SBarry Smith @*/
3904d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3905d71ae5a4SJacob Faibussowitsch {
3906273d9f13SBarry Smith   PetscFunctionBegin;
39076ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
39086ba663aaSJed Brown   PetscValidType(B, 1);
39096ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3910cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
39113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3912273d9f13SBarry Smith }
3913a1d92eedSBarry Smith 
3914725b52f3SLisandro Dalcin /*@C
391511a5261eSBarry Smith   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3916725b52f3SLisandro Dalcin 
3917d083f849SBarry Smith   Collective
3918725b52f3SLisandro Dalcin 
3919725b52f3SLisandro Dalcin   Input Parameters:
39201c4f3114SJed Brown + B  - the matrix
392120f4b53cSBarry Smith . bs - the blocksize
3922d8a51d2aSBarry Smith . i  - the indices into `j` for the start of each local row (indices start with zero)
3923d8a51d2aSBarry Smith . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
3924d8a51d2aSBarry Smith - v  - optional values in the matrix, use `NULL` if not provided
3925725b52f3SLisandro Dalcin 
3926664954b6SBarry Smith   Level: advanced
3927725b52f3SLisandro Dalcin 
39283adadaf3SJed Brown   Notes:
3929d8a51d2aSBarry Smith   The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3930d8a51d2aSBarry Smith 
393111a5261eSBarry Smith   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
393211a5261eSBarry Smith   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
39333adadaf3SJed Brown   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
393411a5261eSBarry Smith   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
39353adadaf3SJed Brown   block column and the second index is over columns within a block.
39363adadaf3SJed Brown 
3937664954b6SBarry Smith   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3938664954b6SBarry Smith 
39391cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3940725b52f3SLisandro Dalcin @*/
3941d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3942d71ae5a4SJacob Faibussowitsch {
3943725b52f3SLisandro Dalcin   PetscFunctionBegin;
39446ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
39456ba663aaSJed Brown   PetscValidType(B, 1);
39466ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3947cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
39483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3949725b52f3SLisandro Dalcin }
3950725b52f3SLisandro Dalcin 
3951c75a6043SHong Zhang /*@
395211a5261eSBarry Smith   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3953c75a6043SHong Zhang 
3954d083f849SBarry Smith   Collective
3955c75a6043SHong Zhang 
3956c75a6043SHong Zhang   Input Parameters:
3957c75a6043SHong Zhang + comm - must be an MPI communicator of size 1
3958c75a6043SHong Zhang . bs   - size of block
3959c75a6043SHong Zhang . m    - number of rows
3960c75a6043SHong Zhang . n    - number of columns
3961483a2f95SBarry Smith . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3962c75a6043SHong Zhang . j    - column indices
3963c75a6043SHong Zhang - a    - matrix values
3964c75a6043SHong Zhang 
3965c75a6043SHong Zhang   Output Parameter:
3966c75a6043SHong Zhang . mat - the matrix
3967c75a6043SHong Zhang 
3968dfb205c3SBarry Smith   Level: advanced
3969c75a6043SHong Zhang 
3970c75a6043SHong Zhang   Notes:
39712ef1f0ffSBarry Smith   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3972c75a6043SHong Zhang   once the matrix is destroyed
3973c75a6043SHong Zhang 
3974c75a6043SHong Zhang   You cannot set new nonzero locations into this matrix, that will generate an error.
3975c75a6043SHong Zhang 
39762ef1f0ffSBarry Smith   The `i` and `j` indices are 0 based
3977c75a6043SHong Zhang 
397811a5261eSBarry Smith   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3979dfb205c3SBarry Smith 
39803adadaf3SJed Brown   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
39813adadaf3SJed Brown   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
39823adadaf3SJed Brown   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
39833adadaf3SJed Brown   with column-major ordering within blocks.
3984dfb205c3SBarry Smith 
39851cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3986c75a6043SHong Zhang @*/
3987d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3988d71ae5a4SJacob Faibussowitsch {
3989c75a6043SHong Zhang   Mat_SeqBAIJ *baij;
3990c75a6043SHong Zhang 
3991c75a6043SHong Zhang   PetscFunctionBegin;
39925f80ce2aSJacob Faibussowitsch   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
39935f80ce2aSJacob Faibussowitsch   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3994c75a6043SHong Zhang 
39959566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
39969566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, m, n));
39979566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATSEQBAIJ));
39989566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3999c75a6043SHong Zhang   baij = (Mat_SeqBAIJ *)(*mat)->data;
40009566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
4001c75a6043SHong Zhang 
4002c75a6043SHong Zhang   baij->i = i;
4003c75a6043SHong Zhang   baij->j = j;
4004c75a6043SHong Zhang   baij->a = a;
400526fbe8dcSKarl Rupp 
4006c75a6043SHong Zhang   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
4007e6b907acSBarry Smith   baij->free_a         = PETSC_FALSE;
4008e6b907acSBarry Smith   baij->free_ij        = PETSC_FALSE;
4009ceb5bf51SJacob Faibussowitsch   baij->free_imax_ilen = PETSC_TRUE;
4010c75a6043SHong Zhang 
4011ceb5bf51SJacob Faibussowitsch   for (PetscInt ii = 0; ii < m; ii++) {
4012ceb5bf51SJacob Faibussowitsch     const PetscInt row_len = i[ii + 1] - i[ii];
4013ceb5bf51SJacob Faibussowitsch 
4014ceb5bf51SJacob Faibussowitsch     baij->ilen[ii] = baij->imax[ii] = row_len;
4015ceb5bf51SJacob Faibussowitsch     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
4016c75a6043SHong Zhang   }
401776bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
4018ceb5bf51SJacob Faibussowitsch     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
40196bdcaf15SBarry Smith       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
40206bdcaf15SBarry Smith       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
4021c75a6043SHong Zhang     }
402276bd3646SJed Brown   }
4023c75a6043SHong Zhang 
40249566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
40259566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
40263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4027c75a6043SHong Zhang }
4028bdf6f3fcSHong Zhang 
4029d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4030d71ae5a4SJacob Faibussowitsch {
4031bdf6f3fcSHong Zhang   PetscFunctionBegin;
40329566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
40333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4034bdf6f3fcSHong Zhang }
4035