xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f9663b93ddca5ef47eb7ea337a18d9144e6b1e51)
1be1d678aSKris Buschelman 
22593348eSBarry Smith /*
3b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
42593348eSBarry Smith   matrix storage format.
52593348eSBarry Smith */
6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7c6db04a5SJed Brown #include <petscblaslapack.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
1043516a2dSKris Buschelman 
1126cec326SBarry Smith /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
1226cec326SBarry Smith #define TYPE BAIJ
1326cec326SBarry Smith #define TYPE_BS
1426cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1526cec326SBarry Smith #undef TYPE_BS
1626cec326SBarry Smith #define TYPE_BS _BS
1726cec326SBarry Smith #define TYPE_BS_ON
1826cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1926cec326SBarry Smith #undef TYPE_BS
2026cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmat.h"
2126cec326SBarry Smith #undef TYPE
2226cec326SBarry Smith #undef TYPE_BS_ON
2326cec326SBarry Smith 
247ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
257ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
267ea3e4caSstefano_zampini #endif
277ea3e4caSstefano_zampini 
28b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
29fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
30b5b72c8aSIrina Sokolova #endif
31c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
32b5b72c8aSIrina Sokolova 
33ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
34d71ae5a4SJacob Faibussowitsch {
359463ebdaSPierre Jolivet   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
36ff6a9541SJacob Faibussowitsch   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
379463ebdaSPierre Jolivet   MatScalar   *a_val = a_aij->a;
389463ebdaSPierre Jolivet 
399463ebdaSPierre Jolivet   PetscFunctionBegin;
409566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
41ff6a9541SJacob Faibussowitsch   PetscCall(PetscArrayzero(reductions, n));
429463ebdaSPierre Jolivet   if (type == NORM_2) {
43ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
449463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
459463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
46857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
479463ebdaSPierre Jolivet           a_val++;
489463ebdaSPierre Jolivet         }
499463ebdaSPierre Jolivet       }
509463ebdaSPierre Jolivet     }
519463ebdaSPierre Jolivet   } else if (type == NORM_1) {
52ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
539463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
549463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
55857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
569463ebdaSPierre Jolivet           a_val++;
579463ebdaSPierre Jolivet         }
589463ebdaSPierre Jolivet       }
599463ebdaSPierre Jolivet     }
609463ebdaSPierre Jolivet   } else if (type == NORM_INFINITY) {
61ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
629463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
639463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
649463ebdaSPierre Jolivet           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
65857cbf51SRichard Tran Mills           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
669463ebdaSPierre Jolivet           a_val++;
679463ebdaSPierre Jolivet         }
689463ebdaSPierre Jolivet       }
699463ebdaSPierre Jolivet     }
70857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
71ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
72857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
73857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
74857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
75857cbf51SRichard Tran Mills           a_val++;
76857cbf51SRichard Tran Mills         }
77857cbf51SRichard Tran Mills       }
78857cbf51SRichard Tran Mills     }
79857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
80ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
81857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
82857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
83857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
84857cbf51SRichard Tran Mills           a_val++;
85857cbf51SRichard Tran Mills         }
86857cbf51SRichard Tran Mills       }
87857cbf51SRichard Tran Mills     }
88857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
899463ebdaSPierre Jolivet   if (type == NORM_2) {
90ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
91857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
92ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
939463ebdaSPierre Jolivet   }
943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
959463ebdaSPierre Jolivet }
969463ebdaSPierre Jolivet 
9766976f2fSJacob Faibussowitsch static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
98d71ae5a4SJacob Faibussowitsch {
99b01c7715SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
100de80f912SBarry Smith   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
1017f0c90edSBarry Smith   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
10262bba022SBarry Smith   PetscReal    shift = 0.0;
1031a9391e3SHong Zhang   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
104b01c7715SBarry Smith 
105b01c7715SBarry Smith   PetscFunctionBegin;
106a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
107a455e926SHong Zhang 
1089797317bSBarry Smith   if (a->idiagvalid) {
1099797317bSBarry Smith     if (values) *values = a->idiag;
1103ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1119797317bSBarry Smith   }
1129566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
113b01c7715SBarry Smith   diag_offset = a->diag;
1144dfa11a4SJacob Faibussowitsch   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
115b01c7715SBarry Smith   diag = a->idiag;
116bbead8a2SBarry Smith   if (values) *values = a->idiag;
117b01c7715SBarry Smith   /* factor and invert each block */
118521d7252SBarry Smith   switch (bs) {
119ab040260SJed Brown   case 1:
120ab040260SJed Brown     for (i = 0; i < mbs; i++) {
121ab040260SJed Brown       odiag   = v + 1 * diag_offset[i];
122ab040260SJed Brown       diag[0] = odiag[0];
123ec1892c8SHong Zhang 
124ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
125ec1892c8SHong Zhang         if (allowzeropivot) {
1267b6c816cSBarry Smith           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1277b6c816cSBarry Smith           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1287b6c816cSBarry Smith           A->factorerror_zeropivot_row   = i;
1299566063dSJacob Faibussowitsch           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
13098921bdaSJacob Faibussowitsch         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
131ec1892c8SHong Zhang       }
132ec1892c8SHong Zhang 
133d4a378daSJed Brown       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134ab040260SJed Brown       diag += 1;
135ab040260SJed Brown     }
136ab040260SJed Brown     break;
137b01c7715SBarry Smith   case 2:
138b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
139b01c7715SBarry Smith       odiag   = v + 4 * diag_offset[i];
1409371c9d4SSatish Balay       diag[0] = odiag[0];
1419371c9d4SSatish Balay       diag[1] = odiag[1];
1429371c9d4SSatish Balay       diag[2] = odiag[2];
1439371c9d4SSatish Balay       diag[3] = odiag[3];
1449566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1457b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146b01c7715SBarry Smith       diag += 4;
147b01c7715SBarry Smith     }
148b01c7715SBarry Smith     break;
149b01c7715SBarry Smith   case 3:
150b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
151b01c7715SBarry Smith       odiag   = v + 9 * diag_offset[i];
1529371c9d4SSatish Balay       diag[0] = odiag[0];
1539371c9d4SSatish Balay       diag[1] = odiag[1];
1549371c9d4SSatish Balay       diag[2] = odiag[2];
1559371c9d4SSatish Balay       diag[3] = odiag[3];
1569371c9d4SSatish Balay       diag[4] = odiag[4];
1579371c9d4SSatish Balay       diag[5] = odiag[5];
1589371c9d4SSatish Balay       diag[6] = odiag[6];
1599371c9d4SSatish Balay       diag[7] = odiag[7];
160b01c7715SBarry Smith       diag[8] = odiag[8];
1619566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
1627b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163b01c7715SBarry Smith       diag += 9;
164b01c7715SBarry Smith     }
165b01c7715SBarry Smith     break;
166b01c7715SBarry Smith   case 4:
167b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
168b01c7715SBarry Smith       odiag = v + 16 * diag_offset[i];
1699566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 16));
1709566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
1717b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172b01c7715SBarry Smith       diag += 16;
173b01c7715SBarry Smith     }
174b01c7715SBarry Smith     break;
175b01c7715SBarry Smith   case 5:
176b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
177b01c7715SBarry Smith       odiag = v + 25 * diag_offset[i];
1789566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 25));
1799566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
1807b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181b01c7715SBarry Smith       diag += 25;
182b01c7715SBarry Smith     }
183b01c7715SBarry Smith     break;
184d49b2adcSBarry Smith   case 6:
185d49b2adcSBarry Smith     for (i = 0; i < mbs; i++) {
186d49b2adcSBarry Smith       odiag = v + 36 * diag_offset[i];
1879566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 36));
1889566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
1897b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190d49b2adcSBarry Smith       diag += 36;
191d49b2adcSBarry Smith     }
192d49b2adcSBarry Smith     break;
193de80f912SBarry Smith   case 7:
194de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
195de80f912SBarry Smith       odiag = v + 49 * diag_offset[i];
1969566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 49));
1979566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
1987b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199de80f912SBarry Smith       diag += 49;
200de80f912SBarry Smith     }
201de80f912SBarry Smith     break;
202b01c7715SBarry Smith   default:
2039566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
205de80f912SBarry Smith       odiag = v + bs2 * diag_offset[i];
2069566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, bs2));
2079566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
2087b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209de80f912SBarry Smith       diag += bs2;
210de80f912SBarry Smith     }
2119566063dSJacob Faibussowitsch     PetscCall(PetscFree2(v_work, v_pivots));
212b01c7715SBarry Smith   }
213b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
2143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
215b01c7715SBarry Smith }
216b01c7715SBarry Smith 
21766976f2fSJacob Faibussowitsch static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218d71ae5a4SJacob Faibussowitsch {
2196d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
220e48d15efSToby Isaac   PetscScalar       *x, *work, *w, *workt, *t;
221e48d15efSToby Isaac   const MatScalar   *v, *aa = a->a, *idiag;
222e48d15efSToby Isaac   const PetscScalar *b, *xb;
2235455b99fSToby Isaac   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224e48d15efSToby Isaac   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225c1ac3661SBarry Smith   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
226b01c7715SBarry Smith 
227b01c7715SBarry Smith   PetscFunctionBegin;
228b01c7715SBarry Smith   its = its * lits;
2295f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
2305f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2315f80ce2aSJacob Faibussowitsch   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
2325f80ce2aSJacob Faibussowitsch   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
2335f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234b01c7715SBarry Smith 
2359566063dSJacob Faibussowitsch   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236b01c7715SBarry Smith 
2373ba16761SJacob Faibussowitsch   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238b01c7715SBarry Smith   diag  = a->diag;
239b01c7715SBarry Smith   idiag = a->idiag;
240de80f912SBarry Smith   k     = PetscMax(A->rmap->n, A->cmap->n);
24148a46eb9SPierre Jolivet   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
24248a46eb9SPierre Jolivet   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
24348a46eb9SPierre Jolivet   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
2443475c22fSBarry Smith   work = a->mult_work;
2453475c22fSBarry Smith   t    = a->sor_workt;
246de80f912SBarry Smith   w    = a->sor_work;
247de80f912SBarry Smith 
2489566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
2499566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
250de80f912SBarry Smith 
251de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
252de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253e48d15efSToby Isaac       switch (bs) {
254e48d15efSToby Isaac       case 1:
255e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256e48d15efSToby Isaac         t[0] = b[0];
257e48d15efSToby Isaac         i2   = 1;
258e48d15efSToby Isaac         idiag += 1;
259e48d15efSToby Isaac         for (i = 1; i < m; i++) {
260e48d15efSToby Isaac           v    = aa + ai[i];
261e48d15efSToby Isaac           vi   = aj + ai[i];
262e48d15efSToby Isaac           nz   = diag[i] - ai[i];
263e48d15efSToby Isaac           s[0] = b[i2];
264e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
265e48d15efSToby Isaac             xw[0] = x[vi[j]];
266e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267e48d15efSToby Isaac           }
268e48d15efSToby Isaac           t[i2] = s[0];
269e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270e48d15efSToby Isaac           x[i2] = xw[0];
271e48d15efSToby Isaac           idiag += 1;
272e48d15efSToby Isaac           i2 += 1;
273e48d15efSToby Isaac         }
274e48d15efSToby Isaac         break;
275e48d15efSToby Isaac       case 2:
276e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
2779371c9d4SSatish Balay         t[0] = b[0];
2789371c9d4SSatish Balay         t[1] = b[1];
279e48d15efSToby Isaac         i2   = 2;
280e48d15efSToby Isaac         idiag += 4;
281e48d15efSToby Isaac         for (i = 1; i < m; i++) {
282e48d15efSToby Isaac           v    = aa + 4 * ai[i];
283e48d15efSToby Isaac           vi   = aj + ai[i];
284e48d15efSToby Isaac           nz   = diag[i] - ai[i];
2859371c9d4SSatish Balay           s[0] = b[i2];
2869371c9d4SSatish Balay           s[1] = b[i2 + 1];
287e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
288e48d15efSToby Isaac             idx   = 2 * vi[j];
289e48d15efSToby Isaac             it    = 4 * j;
2909371c9d4SSatish Balay             xw[0] = x[idx];
2919371c9d4SSatish Balay             xw[1] = x[1 + idx];
292e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293e48d15efSToby Isaac           }
2949371c9d4SSatish Balay           t[i2]     = s[0];
2959371c9d4SSatish Balay           t[i2 + 1] = s[1];
296e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
2979371c9d4SSatish Balay           x[i2]     = xw[0];
2989371c9d4SSatish Balay           x[i2 + 1] = xw[1];
299e48d15efSToby Isaac           idiag += 4;
300e48d15efSToby Isaac           i2 += 2;
301e48d15efSToby Isaac         }
302e48d15efSToby Isaac         break;
303e48d15efSToby Isaac       case 3:
304e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
3059371c9d4SSatish Balay         t[0] = b[0];
3069371c9d4SSatish Balay         t[1] = b[1];
3079371c9d4SSatish Balay         t[2] = b[2];
308e48d15efSToby Isaac         i2   = 3;
309e48d15efSToby Isaac         idiag += 9;
310e48d15efSToby Isaac         for (i = 1; i < m; i++) {
311e48d15efSToby Isaac           v    = aa + 9 * ai[i];
312e48d15efSToby Isaac           vi   = aj + ai[i];
313e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3149371c9d4SSatish Balay           s[0] = b[i2];
3159371c9d4SSatish Balay           s[1] = b[i2 + 1];
3169371c9d4SSatish Balay           s[2] = b[i2 + 2];
317e48d15efSToby Isaac           while (nz--) {
318e48d15efSToby Isaac             idx   = 3 * (*vi++);
3199371c9d4SSatish Balay             xw[0] = x[idx];
3209371c9d4SSatish Balay             xw[1] = x[1 + idx];
3219371c9d4SSatish Balay             xw[2] = x[2 + idx];
322e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323e48d15efSToby Isaac             v += 9;
324e48d15efSToby Isaac           }
3259371c9d4SSatish Balay           t[i2]     = s[0];
3269371c9d4SSatish Balay           t[i2 + 1] = s[1];
3279371c9d4SSatish Balay           t[i2 + 2] = s[2];
328e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
3299371c9d4SSatish Balay           x[i2]     = xw[0];
3309371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3319371c9d4SSatish Balay           x[i2 + 2] = xw[2];
332e48d15efSToby Isaac           idiag += 9;
333e48d15efSToby Isaac           i2 += 3;
334e48d15efSToby Isaac         }
335e48d15efSToby Isaac         break;
336e48d15efSToby Isaac       case 4:
337e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
3389371c9d4SSatish Balay         t[0] = b[0];
3399371c9d4SSatish Balay         t[1] = b[1];
3409371c9d4SSatish Balay         t[2] = b[2];
3419371c9d4SSatish Balay         t[3] = b[3];
342e48d15efSToby Isaac         i2   = 4;
343e48d15efSToby Isaac         idiag += 16;
344e48d15efSToby Isaac         for (i = 1; i < m; i++) {
345e48d15efSToby Isaac           v    = aa + 16 * ai[i];
346e48d15efSToby Isaac           vi   = aj + ai[i];
347e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3489371c9d4SSatish Balay           s[0] = b[i2];
3499371c9d4SSatish Balay           s[1] = b[i2 + 1];
3509371c9d4SSatish Balay           s[2] = b[i2 + 2];
3519371c9d4SSatish Balay           s[3] = b[i2 + 3];
352e48d15efSToby Isaac           while (nz--) {
353e48d15efSToby Isaac             idx   = 4 * (*vi++);
3549371c9d4SSatish Balay             xw[0] = x[idx];
3559371c9d4SSatish Balay             xw[1] = x[1 + idx];
3569371c9d4SSatish Balay             xw[2] = x[2 + idx];
3579371c9d4SSatish Balay             xw[3] = x[3 + idx];
358e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359e48d15efSToby Isaac             v += 16;
360e48d15efSToby Isaac           }
3619371c9d4SSatish Balay           t[i2]     = s[0];
3629371c9d4SSatish Balay           t[i2 + 1] = s[1];
3639371c9d4SSatish Balay           t[i2 + 2] = s[2];
3649371c9d4SSatish Balay           t[i2 + 3] = s[3];
365e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
3669371c9d4SSatish Balay           x[i2]     = xw[0];
3679371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3689371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3699371c9d4SSatish Balay           x[i2 + 3] = xw[3];
370e48d15efSToby Isaac           idiag += 16;
371e48d15efSToby Isaac           i2 += 4;
372e48d15efSToby Isaac         }
373e48d15efSToby Isaac         break;
374e48d15efSToby Isaac       case 5:
375e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
3769371c9d4SSatish Balay         t[0] = b[0];
3779371c9d4SSatish Balay         t[1] = b[1];
3789371c9d4SSatish Balay         t[2] = b[2];
3799371c9d4SSatish Balay         t[3] = b[3];
3809371c9d4SSatish Balay         t[4] = b[4];
381e48d15efSToby Isaac         i2   = 5;
382e48d15efSToby Isaac         idiag += 25;
383e48d15efSToby Isaac         for (i = 1; i < m; i++) {
384e48d15efSToby Isaac           v    = aa + 25 * ai[i];
385e48d15efSToby Isaac           vi   = aj + ai[i];
386e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3879371c9d4SSatish Balay           s[0] = b[i2];
3889371c9d4SSatish Balay           s[1] = b[i2 + 1];
3899371c9d4SSatish Balay           s[2] = b[i2 + 2];
3909371c9d4SSatish Balay           s[3] = b[i2 + 3];
3919371c9d4SSatish Balay           s[4] = b[i2 + 4];
392e48d15efSToby Isaac           while (nz--) {
393e48d15efSToby Isaac             idx   = 5 * (*vi++);
3949371c9d4SSatish Balay             xw[0] = x[idx];
3959371c9d4SSatish Balay             xw[1] = x[1 + idx];
3969371c9d4SSatish Balay             xw[2] = x[2 + idx];
3979371c9d4SSatish Balay             xw[3] = x[3 + idx];
3989371c9d4SSatish Balay             xw[4] = x[4 + idx];
399e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400e48d15efSToby Isaac             v += 25;
401e48d15efSToby Isaac           }
4029371c9d4SSatish Balay           t[i2]     = s[0];
4039371c9d4SSatish Balay           t[i2 + 1] = s[1];
4049371c9d4SSatish Balay           t[i2 + 2] = s[2];
4059371c9d4SSatish Balay           t[i2 + 3] = s[3];
4069371c9d4SSatish Balay           t[i2 + 4] = s[4];
407e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
4089371c9d4SSatish Balay           x[i2]     = xw[0];
4099371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4109371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4119371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4129371c9d4SSatish Balay           x[i2 + 4] = xw[4];
413e48d15efSToby Isaac           idiag += 25;
414e48d15efSToby Isaac           i2 += 5;
415e48d15efSToby Isaac         }
416e48d15efSToby Isaac         break;
417e48d15efSToby Isaac       case 6:
418e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
4199371c9d4SSatish Balay         t[0] = b[0];
4209371c9d4SSatish Balay         t[1] = b[1];
4219371c9d4SSatish Balay         t[2] = b[2];
4229371c9d4SSatish Balay         t[3] = b[3];
4239371c9d4SSatish Balay         t[4] = b[4];
4249371c9d4SSatish Balay         t[5] = b[5];
425e48d15efSToby Isaac         i2   = 6;
426e48d15efSToby Isaac         idiag += 36;
427e48d15efSToby Isaac         for (i = 1; i < m; i++) {
428e48d15efSToby Isaac           v    = aa + 36 * ai[i];
429e48d15efSToby Isaac           vi   = aj + ai[i];
430e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4319371c9d4SSatish Balay           s[0] = b[i2];
4329371c9d4SSatish Balay           s[1] = b[i2 + 1];
4339371c9d4SSatish Balay           s[2] = b[i2 + 2];
4349371c9d4SSatish Balay           s[3] = b[i2 + 3];
4359371c9d4SSatish Balay           s[4] = b[i2 + 4];
4369371c9d4SSatish Balay           s[5] = b[i2 + 5];
437e48d15efSToby Isaac           while (nz--) {
438e48d15efSToby Isaac             idx   = 6 * (*vi++);
4399371c9d4SSatish Balay             xw[0] = x[idx];
4409371c9d4SSatish Balay             xw[1] = x[1 + idx];
4419371c9d4SSatish Balay             xw[2] = x[2 + idx];
4429371c9d4SSatish Balay             xw[3] = x[3 + idx];
4439371c9d4SSatish Balay             xw[4] = x[4 + idx];
4449371c9d4SSatish Balay             xw[5] = x[5 + idx];
445e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446e48d15efSToby Isaac             v += 36;
447e48d15efSToby Isaac           }
4489371c9d4SSatish Balay           t[i2]     = s[0];
4499371c9d4SSatish Balay           t[i2 + 1] = s[1];
4509371c9d4SSatish Balay           t[i2 + 2] = s[2];
4519371c9d4SSatish Balay           t[i2 + 3] = s[3];
4529371c9d4SSatish Balay           t[i2 + 4] = s[4];
4539371c9d4SSatish Balay           t[i2 + 5] = s[5];
454e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
4559371c9d4SSatish Balay           x[i2]     = xw[0];
4569371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4579371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4589371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4599371c9d4SSatish Balay           x[i2 + 4] = xw[4];
4609371c9d4SSatish Balay           x[i2 + 5] = xw[5];
461e48d15efSToby Isaac           idiag += 36;
462e48d15efSToby Isaac           i2 += 6;
463e48d15efSToby Isaac         }
464e48d15efSToby Isaac         break;
465e48d15efSToby Isaac       case 7:
466e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
4679371c9d4SSatish Balay         t[0] = b[0];
4689371c9d4SSatish Balay         t[1] = b[1];
4699371c9d4SSatish Balay         t[2] = b[2];
4709371c9d4SSatish Balay         t[3] = b[3];
4719371c9d4SSatish Balay         t[4] = b[4];
4729371c9d4SSatish Balay         t[5] = b[5];
4739371c9d4SSatish Balay         t[6] = b[6];
474e48d15efSToby Isaac         i2   = 7;
475e48d15efSToby Isaac         idiag += 49;
476e48d15efSToby Isaac         for (i = 1; i < m; i++) {
477e48d15efSToby Isaac           v    = aa + 49 * ai[i];
478e48d15efSToby Isaac           vi   = aj + ai[i];
479e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4809371c9d4SSatish Balay           s[0] = b[i2];
4819371c9d4SSatish Balay           s[1] = b[i2 + 1];
4829371c9d4SSatish Balay           s[2] = b[i2 + 2];
4839371c9d4SSatish Balay           s[3] = b[i2 + 3];
4849371c9d4SSatish Balay           s[4] = b[i2 + 4];
4859371c9d4SSatish Balay           s[5] = b[i2 + 5];
4869371c9d4SSatish Balay           s[6] = b[i2 + 6];
487e48d15efSToby Isaac           while (nz--) {
488e48d15efSToby Isaac             idx   = 7 * (*vi++);
4899371c9d4SSatish Balay             xw[0] = x[idx];
4909371c9d4SSatish Balay             xw[1] = x[1 + idx];
4919371c9d4SSatish Balay             xw[2] = x[2 + idx];
4929371c9d4SSatish Balay             xw[3] = x[3 + idx];
4939371c9d4SSatish Balay             xw[4] = x[4 + idx];
4949371c9d4SSatish Balay             xw[5] = x[5 + idx];
4959371c9d4SSatish Balay             xw[6] = x[6 + idx];
496e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497e48d15efSToby Isaac             v += 49;
498e48d15efSToby Isaac           }
4999371c9d4SSatish Balay           t[i2]     = s[0];
5009371c9d4SSatish Balay           t[i2 + 1] = s[1];
5019371c9d4SSatish Balay           t[i2 + 2] = s[2];
5029371c9d4SSatish Balay           t[i2 + 3] = s[3];
5039371c9d4SSatish Balay           t[i2 + 4] = s[4];
5049371c9d4SSatish Balay           t[i2 + 5] = s[5];
5059371c9d4SSatish Balay           t[i2 + 6] = s[6];
506e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
5079371c9d4SSatish Balay           x[i2]     = xw[0];
5089371c9d4SSatish Balay           x[i2 + 1] = xw[1];
5099371c9d4SSatish Balay           x[i2 + 2] = xw[2];
5109371c9d4SSatish Balay           x[i2 + 3] = xw[3];
5119371c9d4SSatish Balay           x[i2 + 4] = xw[4];
5129371c9d4SSatish Balay           x[i2 + 5] = xw[5];
5139371c9d4SSatish Balay           x[i2 + 6] = xw[6];
514e48d15efSToby Isaac           idiag += 49;
515e48d15efSToby Isaac           i2 += 7;
516e48d15efSToby Isaac         }
517e48d15efSToby Isaac         break;
518e48d15efSToby Isaac       default:
51996b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
5209566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(t, b, bs));
521de80f912SBarry Smith         i2 = bs;
522de80f912SBarry Smith         idiag += bs2;
523de80f912SBarry Smith         for (i = 1; i < m; i++) {
524de80f912SBarry Smith           v  = aa + bs2 * ai[i];
525de80f912SBarry Smith           vi = aj + ai[i];
526de80f912SBarry Smith           nz = diag[i] - ai[i];
527de80f912SBarry Smith 
5289566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
529de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
530de80f912SBarry Smith           workt = work;
531de80f912SBarry Smith           for (j = 0; j < nz; j++) {
5329566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533de80f912SBarry Smith             workt += bs;
534de80f912SBarry Smith           }
53596b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
5369566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(t + i2, w, bs));
53796b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538de80f912SBarry Smith 
539de80f912SBarry Smith           idiag += bs2;
540de80f912SBarry Smith           i2 += bs;
541de80f912SBarry Smith         }
542e48d15efSToby Isaac         break;
543e48d15efSToby Isaac       }
544de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
5459566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546e48d15efSToby Isaac       xb = t;
5479371c9d4SSatish Balay     } else xb = b;
548de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
550e48d15efSToby Isaac       i2    = bs * (m - 1);
551e48d15efSToby Isaac       switch (bs) {
552e48d15efSToby Isaac       case 1:
553e48d15efSToby Isaac         s[0] = xb[i2];
554e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555e48d15efSToby Isaac         x[i2] = xw[0];
556e48d15efSToby Isaac         i2 -= 1;
557e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
558e48d15efSToby Isaac           v    = aa + (diag[i] + 1);
559e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
560e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
561e48d15efSToby Isaac           s[0] = xb[i2];
562e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
563e48d15efSToby Isaac             xw[0] = x[vi[j]];
564e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565e48d15efSToby Isaac           }
566e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567e48d15efSToby Isaac           x[i2] = xw[0];
568e48d15efSToby Isaac           idiag -= 1;
569e48d15efSToby Isaac           i2 -= 1;
570e48d15efSToby Isaac         }
571e48d15efSToby Isaac         break;
572e48d15efSToby Isaac       case 2:
5739371c9d4SSatish Balay         s[0] = xb[i2];
5749371c9d4SSatish Balay         s[1] = xb[i2 + 1];
575e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5769371c9d4SSatish Balay         x[i2]     = xw[0];
5779371c9d4SSatish Balay         x[i2 + 1] = xw[1];
578e48d15efSToby Isaac         i2 -= 2;
579e48d15efSToby Isaac         idiag -= 4;
580e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
581e48d15efSToby Isaac           v    = aa + 4 * (diag[i] + 1);
582e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
583e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
5849371c9d4SSatish Balay           s[0] = xb[i2];
5859371c9d4SSatish Balay           s[1] = xb[i2 + 1];
586e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
587e48d15efSToby Isaac             idx   = 2 * vi[j];
588e48d15efSToby Isaac             it    = 4 * j;
5899371c9d4SSatish Balay             xw[0] = x[idx];
5909371c9d4SSatish Balay             xw[1] = x[1 + idx];
591e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592e48d15efSToby Isaac           }
593e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5949371c9d4SSatish Balay           x[i2]     = xw[0];
5959371c9d4SSatish Balay           x[i2 + 1] = xw[1];
596e48d15efSToby Isaac           idiag -= 4;
597e48d15efSToby Isaac           i2 -= 2;
598e48d15efSToby Isaac         }
599e48d15efSToby Isaac         break;
600e48d15efSToby Isaac       case 3:
6019371c9d4SSatish Balay         s[0] = xb[i2];
6029371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6039371c9d4SSatish Balay         s[2] = xb[i2 + 2];
604e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6059371c9d4SSatish Balay         x[i2]     = xw[0];
6069371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6079371c9d4SSatish Balay         x[i2 + 2] = xw[2];
608e48d15efSToby Isaac         i2 -= 3;
609e48d15efSToby Isaac         idiag -= 9;
610e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
611e48d15efSToby Isaac           v    = aa + 9 * (diag[i] + 1);
612e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
613e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6149371c9d4SSatish Balay           s[0] = xb[i2];
6159371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6169371c9d4SSatish Balay           s[2] = xb[i2 + 2];
617e48d15efSToby Isaac           while (nz--) {
618e48d15efSToby Isaac             idx   = 3 * (*vi++);
6199371c9d4SSatish Balay             xw[0] = x[idx];
6209371c9d4SSatish Balay             xw[1] = x[1 + idx];
6219371c9d4SSatish Balay             xw[2] = x[2 + idx];
622e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623e48d15efSToby Isaac             v += 9;
624e48d15efSToby Isaac           }
625e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6269371c9d4SSatish Balay           x[i2]     = xw[0];
6279371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6289371c9d4SSatish Balay           x[i2 + 2] = xw[2];
629e48d15efSToby Isaac           idiag -= 9;
630e48d15efSToby Isaac           i2 -= 3;
631e48d15efSToby Isaac         }
632e48d15efSToby Isaac         break;
633e48d15efSToby Isaac       case 4:
6349371c9d4SSatish Balay         s[0] = xb[i2];
6359371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6369371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6379371c9d4SSatish Balay         s[3] = xb[i2 + 3];
638e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6399371c9d4SSatish Balay         x[i2]     = xw[0];
6409371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6419371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6429371c9d4SSatish Balay         x[i2 + 3] = xw[3];
643e48d15efSToby Isaac         i2 -= 4;
644e48d15efSToby Isaac         idiag -= 16;
645e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
646e48d15efSToby Isaac           v    = aa + 16 * (diag[i] + 1);
647e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
648e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6499371c9d4SSatish Balay           s[0] = xb[i2];
6509371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6519371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6529371c9d4SSatish Balay           s[3] = xb[i2 + 3];
653e48d15efSToby Isaac           while (nz--) {
654e48d15efSToby Isaac             idx   = 4 * (*vi++);
6559371c9d4SSatish Balay             xw[0] = x[idx];
6569371c9d4SSatish Balay             xw[1] = x[1 + idx];
6579371c9d4SSatish Balay             xw[2] = x[2 + idx];
6589371c9d4SSatish Balay             xw[3] = x[3 + idx];
659e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660e48d15efSToby Isaac             v += 16;
661e48d15efSToby Isaac           }
662e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6639371c9d4SSatish Balay           x[i2]     = xw[0];
6649371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6659371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6669371c9d4SSatish Balay           x[i2 + 3] = xw[3];
667e48d15efSToby Isaac           idiag -= 16;
668e48d15efSToby Isaac           i2 -= 4;
669e48d15efSToby Isaac         }
670e48d15efSToby Isaac         break;
671e48d15efSToby Isaac       case 5:
6729371c9d4SSatish Balay         s[0] = xb[i2];
6739371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6749371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6759371c9d4SSatish Balay         s[3] = xb[i2 + 3];
6769371c9d4SSatish Balay         s[4] = xb[i2 + 4];
677e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6789371c9d4SSatish Balay         x[i2]     = xw[0];
6799371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6809371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6819371c9d4SSatish Balay         x[i2 + 3] = xw[3];
6829371c9d4SSatish Balay         x[i2 + 4] = xw[4];
683e48d15efSToby Isaac         i2 -= 5;
684e48d15efSToby Isaac         idiag -= 25;
685e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
686e48d15efSToby Isaac           v    = aa + 25 * (diag[i] + 1);
687e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
688e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6899371c9d4SSatish Balay           s[0] = xb[i2];
6909371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6919371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6929371c9d4SSatish Balay           s[3] = xb[i2 + 3];
6939371c9d4SSatish Balay           s[4] = xb[i2 + 4];
694e48d15efSToby Isaac           while (nz--) {
695e48d15efSToby Isaac             idx   = 5 * (*vi++);
6969371c9d4SSatish Balay             xw[0] = x[idx];
6979371c9d4SSatish Balay             xw[1] = x[1 + idx];
6989371c9d4SSatish Balay             xw[2] = x[2 + idx];
6999371c9d4SSatish Balay             xw[3] = x[3 + idx];
7009371c9d4SSatish Balay             xw[4] = x[4 + idx];
701e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702e48d15efSToby Isaac             v += 25;
703e48d15efSToby Isaac           }
704e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
7059371c9d4SSatish Balay           x[i2]     = xw[0];
7069371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7079371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7089371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7099371c9d4SSatish Balay           x[i2 + 4] = xw[4];
710e48d15efSToby Isaac           idiag -= 25;
711e48d15efSToby Isaac           i2 -= 5;
712e48d15efSToby Isaac         }
713e48d15efSToby Isaac         break;
714e48d15efSToby Isaac       case 6:
7159371c9d4SSatish Balay         s[0] = xb[i2];
7169371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7179371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7189371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7199371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7209371c9d4SSatish Balay         s[5] = xb[i2 + 5];
721e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7229371c9d4SSatish Balay         x[i2]     = xw[0];
7239371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7249371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7259371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7269371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7279371c9d4SSatish Balay         x[i2 + 5] = xw[5];
728e48d15efSToby Isaac         i2 -= 6;
729e48d15efSToby Isaac         idiag -= 36;
730e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
731e48d15efSToby Isaac           v    = aa + 36 * (diag[i] + 1);
732e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
733e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7349371c9d4SSatish Balay           s[0] = xb[i2];
7359371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7369371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7379371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7389371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7399371c9d4SSatish Balay           s[5] = xb[i2 + 5];
740e48d15efSToby Isaac           while (nz--) {
741e48d15efSToby Isaac             idx   = 6 * (*vi++);
7429371c9d4SSatish Balay             xw[0] = x[idx];
7439371c9d4SSatish Balay             xw[1] = x[1 + idx];
7449371c9d4SSatish Balay             xw[2] = x[2 + idx];
7459371c9d4SSatish Balay             xw[3] = x[3 + idx];
7469371c9d4SSatish Balay             xw[4] = x[4 + idx];
7479371c9d4SSatish Balay             xw[5] = x[5 + idx];
748e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749e48d15efSToby Isaac             v += 36;
750e48d15efSToby Isaac           }
751e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7529371c9d4SSatish Balay           x[i2]     = xw[0];
7539371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7549371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7559371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7569371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7579371c9d4SSatish Balay           x[i2 + 5] = xw[5];
758e48d15efSToby Isaac           idiag -= 36;
759e48d15efSToby Isaac           i2 -= 6;
760e48d15efSToby Isaac         }
761e48d15efSToby Isaac         break;
762e48d15efSToby Isaac       case 7:
7639371c9d4SSatish Balay         s[0] = xb[i2];
7649371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7659371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7669371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7679371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7689371c9d4SSatish Balay         s[5] = xb[i2 + 5];
7699371c9d4SSatish Balay         s[6] = xb[i2 + 6];
770e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
7719371c9d4SSatish Balay         x[i2]     = xw[0];
7729371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7739371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7749371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7759371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7769371c9d4SSatish Balay         x[i2 + 5] = xw[5];
7779371c9d4SSatish Balay         x[i2 + 6] = xw[6];
778e48d15efSToby Isaac         i2 -= 7;
779e48d15efSToby Isaac         idiag -= 49;
780e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
781e48d15efSToby Isaac           v    = aa + 49 * (diag[i] + 1);
782e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
783e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7849371c9d4SSatish Balay           s[0] = xb[i2];
7859371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7869371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7879371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7889371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7899371c9d4SSatish Balay           s[5] = xb[i2 + 5];
7909371c9d4SSatish Balay           s[6] = xb[i2 + 6];
791e48d15efSToby Isaac           while (nz--) {
792e48d15efSToby Isaac             idx   = 7 * (*vi++);
7939371c9d4SSatish Balay             xw[0] = x[idx];
7949371c9d4SSatish Balay             xw[1] = x[1 + idx];
7959371c9d4SSatish Balay             xw[2] = x[2 + idx];
7969371c9d4SSatish Balay             xw[3] = x[3 + idx];
7979371c9d4SSatish Balay             xw[4] = x[4 + idx];
7989371c9d4SSatish Balay             xw[5] = x[5 + idx];
7999371c9d4SSatish Balay             xw[6] = x[6 + idx];
800e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801e48d15efSToby Isaac             v += 49;
802e48d15efSToby Isaac           }
803e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
8049371c9d4SSatish Balay           x[i2]     = xw[0];
8059371c9d4SSatish Balay           x[i2 + 1] = xw[1];
8069371c9d4SSatish Balay           x[i2 + 2] = xw[2];
8079371c9d4SSatish Balay           x[i2 + 3] = xw[3];
8089371c9d4SSatish Balay           x[i2 + 4] = xw[4];
8099371c9d4SSatish Balay           x[i2 + 5] = xw[5];
8109371c9d4SSatish Balay           x[i2 + 6] = xw[6];
811e48d15efSToby Isaac           idiag -= 49;
812e48d15efSToby Isaac           i2 -= 7;
813e48d15efSToby Isaac         }
814e48d15efSToby Isaac         break;
815e48d15efSToby Isaac       default:
8169566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(w, xb + i2, bs));
81796b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818de80f912SBarry Smith         i2 -= bs;
819e48d15efSToby Isaac         idiag -= bs2;
820de80f912SBarry Smith         for (i = m - 2; i >= 0; i--) {
821de80f912SBarry Smith           v  = aa + bs2 * (diag[i] + 1);
822de80f912SBarry Smith           vi = aj + diag[i] + 1;
823de80f912SBarry Smith           nz = ai[i + 1] - diag[i] - 1;
824de80f912SBarry Smith 
8259566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, xb + i2, bs));
826de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
827de80f912SBarry Smith           workt = work;
828de80f912SBarry Smith           for (j = 0; j < nz; j++) {
8299566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830de80f912SBarry Smith             workt += bs;
831de80f912SBarry Smith           }
83296b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
83396b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834e48d15efSToby Isaac 
835de80f912SBarry Smith           idiag -= bs2;
836de80f912SBarry Smith           i2 -= bs;
837de80f912SBarry Smith         }
838e48d15efSToby Isaac         break;
839e48d15efSToby Isaac       }
8409566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841de80f912SBarry Smith     }
842e48d15efSToby Isaac     its--;
843e48d15efSToby Isaac   }
844e48d15efSToby Isaac   while (its--) {
845e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846e48d15efSToby Isaac       idiag = a->idiag;
847e48d15efSToby Isaac       i2    = 0;
848e48d15efSToby Isaac       switch (bs) {
849e48d15efSToby Isaac       case 1:
850e48d15efSToby Isaac         for (i = 0; i < m; i++) {
851e48d15efSToby Isaac           v    = aa + ai[i];
852e48d15efSToby Isaac           vi   = aj + ai[i];
853e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
854e48d15efSToby Isaac           s[0] = b[i2];
855e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
856e48d15efSToby Isaac             xw[0] = x[vi[j]];
857e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858e48d15efSToby Isaac           }
859e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860e48d15efSToby Isaac           x[i2] += xw[0];
861e48d15efSToby Isaac           idiag += 1;
862e48d15efSToby Isaac           i2 += 1;
863e48d15efSToby Isaac         }
864e48d15efSToby Isaac         break;
865e48d15efSToby Isaac       case 2:
866e48d15efSToby Isaac         for (i = 0; i < m; i++) {
867e48d15efSToby Isaac           v    = aa + 4 * ai[i];
868e48d15efSToby Isaac           vi   = aj + ai[i];
869e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8709371c9d4SSatish Balay           s[0] = b[i2];
8719371c9d4SSatish Balay           s[1] = b[i2 + 1];
872e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
873e48d15efSToby Isaac             idx   = 2 * vi[j];
874e48d15efSToby Isaac             it    = 4 * j;
8759371c9d4SSatish Balay             xw[0] = x[idx];
8769371c9d4SSatish Balay             xw[1] = x[1 + idx];
877e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878e48d15efSToby Isaac           }
879e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
8809371c9d4SSatish Balay           x[i2] += xw[0];
8819371c9d4SSatish Balay           x[i2 + 1] += xw[1];
882e48d15efSToby Isaac           idiag += 4;
883e48d15efSToby Isaac           i2 += 2;
884e48d15efSToby Isaac         }
885e48d15efSToby Isaac         break;
886e48d15efSToby Isaac       case 3:
887e48d15efSToby Isaac         for (i = 0; i < m; i++) {
888e48d15efSToby Isaac           v    = aa + 9 * ai[i];
889e48d15efSToby Isaac           vi   = aj + ai[i];
890e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8919371c9d4SSatish Balay           s[0] = b[i2];
8929371c9d4SSatish Balay           s[1] = b[i2 + 1];
8939371c9d4SSatish Balay           s[2] = b[i2 + 2];
894e48d15efSToby Isaac           while (nz--) {
895e48d15efSToby Isaac             idx   = 3 * (*vi++);
8969371c9d4SSatish Balay             xw[0] = x[idx];
8979371c9d4SSatish Balay             xw[1] = x[1 + idx];
8989371c9d4SSatish Balay             xw[2] = x[2 + idx];
899e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900e48d15efSToby Isaac             v += 9;
901e48d15efSToby Isaac           }
902e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
9039371c9d4SSatish Balay           x[i2] += xw[0];
9049371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9059371c9d4SSatish Balay           x[i2 + 2] += xw[2];
906e48d15efSToby Isaac           idiag += 9;
907e48d15efSToby Isaac           i2 += 3;
908e48d15efSToby Isaac         }
909e48d15efSToby Isaac         break;
910e48d15efSToby Isaac       case 4:
911e48d15efSToby Isaac         for (i = 0; i < m; i++) {
912e48d15efSToby Isaac           v    = aa + 16 * ai[i];
913e48d15efSToby Isaac           vi   = aj + ai[i];
914e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9159371c9d4SSatish Balay           s[0] = b[i2];
9169371c9d4SSatish Balay           s[1] = b[i2 + 1];
9179371c9d4SSatish Balay           s[2] = b[i2 + 2];
9189371c9d4SSatish Balay           s[3] = b[i2 + 3];
919e48d15efSToby Isaac           while (nz--) {
920e48d15efSToby Isaac             idx   = 4 * (*vi++);
9219371c9d4SSatish Balay             xw[0] = x[idx];
9229371c9d4SSatish Balay             xw[1] = x[1 + idx];
9239371c9d4SSatish Balay             xw[2] = x[2 + idx];
9249371c9d4SSatish Balay             xw[3] = x[3 + idx];
925e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926e48d15efSToby Isaac             v += 16;
927e48d15efSToby Isaac           }
928e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
9299371c9d4SSatish Balay           x[i2] += xw[0];
9309371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9319371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9329371c9d4SSatish Balay           x[i2 + 3] += xw[3];
933e48d15efSToby Isaac           idiag += 16;
934e48d15efSToby Isaac           i2 += 4;
935e48d15efSToby Isaac         }
936e48d15efSToby Isaac         break;
937e48d15efSToby Isaac       case 5:
938e48d15efSToby Isaac         for (i = 0; i < m; i++) {
939e48d15efSToby Isaac           v    = aa + 25 * ai[i];
940e48d15efSToby Isaac           vi   = aj + ai[i];
941e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9429371c9d4SSatish Balay           s[0] = b[i2];
9439371c9d4SSatish Balay           s[1] = b[i2 + 1];
9449371c9d4SSatish Balay           s[2] = b[i2 + 2];
9459371c9d4SSatish Balay           s[3] = b[i2 + 3];
9469371c9d4SSatish Balay           s[4] = b[i2 + 4];
947e48d15efSToby Isaac           while (nz--) {
948e48d15efSToby Isaac             idx   = 5 * (*vi++);
9499371c9d4SSatish Balay             xw[0] = x[idx];
9509371c9d4SSatish Balay             xw[1] = x[1 + idx];
9519371c9d4SSatish Balay             xw[2] = x[2 + idx];
9529371c9d4SSatish Balay             xw[3] = x[3 + idx];
9539371c9d4SSatish Balay             xw[4] = x[4 + idx];
954e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955e48d15efSToby Isaac             v += 25;
956e48d15efSToby Isaac           }
957e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
9589371c9d4SSatish Balay           x[i2] += xw[0];
9599371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9609371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9619371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9629371c9d4SSatish Balay           x[i2 + 4] += xw[4];
963e48d15efSToby Isaac           idiag += 25;
964e48d15efSToby Isaac           i2 += 5;
965e48d15efSToby Isaac         }
966e48d15efSToby Isaac         break;
967e48d15efSToby Isaac       case 6:
968e48d15efSToby Isaac         for (i = 0; i < m; i++) {
969e48d15efSToby Isaac           v    = aa + 36 * ai[i];
970e48d15efSToby Isaac           vi   = aj + ai[i];
971e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9729371c9d4SSatish Balay           s[0] = b[i2];
9739371c9d4SSatish Balay           s[1] = b[i2 + 1];
9749371c9d4SSatish Balay           s[2] = b[i2 + 2];
9759371c9d4SSatish Balay           s[3] = b[i2 + 3];
9769371c9d4SSatish Balay           s[4] = b[i2 + 4];
9779371c9d4SSatish Balay           s[5] = b[i2 + 5];
978e48d15efSToby Isaac           while (nz--) {
979e48d15efSToby Isaac             idx   = 6 * (*vi++);
9809371c9d4SSatish Balay             xw[0] = x[idx];
9819371c9d4SSatish Balay             xw[1] = x[1 + idx];
9829371c9d4SSatish Balay             xw[2] = x[2 + idx];
9839371c9d4SSatish Balay             xw[3] = x[3 + idx];
9849371c9d4SSatish Balay             xw[4] = x[4 + idx];
9859371c9d4SSatish Balay             xw[5] = x[5 + idx];
986e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987e48d15efSToby Isaac             v += 36;
988e48d15efSToby Isaac           }
989e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
9909371c9d4SSatish Balay           x[i2] += xw[0];
9919371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9929371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9939371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9949371c9d4SSatish Balay           x[i2 + 4] += xw[4];
9959371c9d4SSatish Balay           x[i2 + 5] += xw[5];
996e48d15efSToby Isaac           idiag += 36;
997e48d15efSToby Isaac           i2 += 6;
998e48d15efSToby Isaac         }
999e48d15efSToby Isaac         break;
1000e48d15efSToby Isaac       case 7:
1001e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1002e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1003e48d15efSToby Isaac           vi   = aj + ai[i];
1004e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10059371c9d4SSatish Balay           s[0] = b[i2];
10069371c9d4SSatish Balay           s[1] = b[i2 + 1];
10079371c9d4SSatish Balay           s[2] = b[i2 + 2];
10089371c9d4SSatish Balay           s[3] = b[i2 + 3];
10099371c9d4SSatish Balay           s[4] = b[i2 + 4];
10109371c9d4SSatish Balay           s[5] = b[i2 + 5];
10119371c9d4SSatish Balay           s[6] = b[i2 + 6];
1012e48d15efSToby Isaac           while (nz--) {
1013e48d15efSToby Isaac             idx   = 7 * (*vi++);
10149371c9d4SSatish Balay             xw[0] = x[idx];
10159371c9d4SSatish Balay             xw[1] = x[1 + idx];
10169371c9d4SSatish Balay             xw[2] = x[2 + idx];
10179371c9d4SSatish Balay             xw[3] = x[3 + idx];
10189371c9d4SSatish Balay             xw[4] = x[4 + idx];
10199371c9d4SSatish Balay             xw[5] = x[5 + idx];
10209371c9d4SSatish Balay             xw[6] = x[6 + idx];
1021e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022e48d15efSToby Isaac             v += 49;
1023e48d15efSToby Isaac           }
1024e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
10259371c9d4SSatish Balay           x[i2] += xw[0];
10269371c9d4SSatish Balay           x[i2 + 1] += xw[1];
10279371c9d4SSatish Balay           x[i2 + 2] += xw[2];
10289371c9d4SSatish Balay           x[i2 + 3] += xw[3];
10299371c9d4SSatish Balay           x[i2 + 4] += xw[4];
10309371c9d4SSatish Balay           x[i2 + 5] += xw[5];
10319371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1032e48d15efSToby Isaac           idiag += 49;
1033e48d15efSToby Isaac           i2 += 7;
1034e48d15efSToby Isaac         }
1035e48d15efSToby Isaac         break;
1036e48d15efSToby Isaac       default:
1037e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1038e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1039e48d15efSToby Isaac           vi = aj + ai[i];
1040e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1041e48d15efSToby Isaac 
10429566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1043e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1044e48d15efSToby Isaac           workt = work;
1045e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
10469566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047e48d15efSToby Isaac             workt += bs;
1048e48d15efSToby Isaac           }
1049e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051e48d15efSToby Isaac 
1052e48d15efSToby Isaac           idiag += bs2;
1053e48d15efSToby Isaac           i2 += bs;
1054e48d15efSToby Isaac         }
1055e48d15efSToby Isaac         break;
1056e48d15efSToby Isaac       }
10579566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058e48d15efSToby Isaac     }
1059e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
1061e48d15efSToby Isaac       i2    = bs * (m - 1);
1062e48d15efSToby Isaac       switch (bs) {
1063e48d15efSToby Isaac       case 1:
1064e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1065e48d15efSToby Isaac           v    = aa + ai[i];
1066e48d15efSToby Isaac           vi   = aj + ai[i];
1067e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
1068e48d15efSToby Isaac           s[0] = b[i2];
1069e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1070e48d15efSToby Isaac             xw[0] = x[vi[j]];
1071e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072e48d15efSToby Isaac           }
1073e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074e48d15efSToby Isaac           x[i2] += xw[0];
1075e48d15efSToby Isaac           idiag -= 1;
1076e48d15efSToby Isaac           i2 -= 1;
1077e48d15efSToby Isaac         }
1078e48d15efSToby Isaac         break;
1079e48d15efSToby Isaac       case 2:
1080e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1081e48d15efSToby Isaac           v    = aa + 4 * ai[i];
1082e48d15efSToby Isaac           vi   = aj + ai[i];
1083e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10849371c9d4SSatish Balay           s[0] = b[i2];
10859371c9d4SSatish Balay           s[1] = b[i2 + 1];
1086e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1087e48d15efSToby Isaac             idx   = 2 * vi[j];
1088e48d15efSToby Isaac             it    = 4 * j;
10899371c9d4SSatish Balay             xw[0] = x[idx];
10909371c9d4SSatish Balay             xw[1] = x[1 + idx];
1091e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092e48d15efSToby Isaac           }
1093e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
10949371c9d4SSatish Balay           x[i2] += xw[0];
10959371c9d4SSatish Balay           x[i2 + 1] += xw[1];
1096e48d15efSToby Isaac           idiag -= 4;
1097e48d15efSToby Isaac           i2 -= 2;
1098e48d15efSToby Isaac         }
1099e48d15efSToby Isaac         break;
1100e48d15efSToby Isaac       case 3:
1101e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1102e48d15efSToby Isaac           v    = aa + 9 * ai[i];
1103e48d15efSToby Isaac           vi   = aj + ai[i];
1104e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11059371c9d4SSatish Balay           s[0] = b[i2];
11069371c9d4SSatish Balay           s[1] = b[i2 + 1];
11079371c9d4SSatish Balay           s[2] = b[i2 + 2];
1108e48d15efSToby Isaac           while (nz--) {
1109e48d15efSToby Isaac             idx   = 3 * (*vi++);
11109371c9d4SSatish Balay             xw[0] = x[idx];
11119371c9d4SSatish Balay             xw[1] = x[1 + idx];
11129371c9d4SSatish Balay             xw[2] = x[2 + idx];
1113e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114e48d15efSToby Isaac             v += 9;
1115e48d15efSToby Isaac           }
1116e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
11179371c9d4SSatish Balay           x[i2] += xw[0];
11189371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11199371c9d4SSatish Balay           x[i2 + 2] += xw[2];
1120e48d15efSToby Isaac           idiag -= 9;
1121e48d15efSToby Isaac           i2 -= 3;
1122e48d15efSToby Isaac         }
1123e48d15efSToby Isaac         break;
1124e48d15efSToby Isaac       case 4:
1125e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1126e48d15efSToby Isaac           v    = aa + 16 * ai[i];
1127e48d15efSToby Isaac           vi   = aj + ai[i];
1128e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11299371c9d4SSatish Balay           s[0] = b[i2];
11309371c9d4SSatish Balay           s[1] = b[i2 + 1];
11319371c9d4SSatish Balay           s[2] = b[i2 + 2];
11329371c9d4SSatish Balay           s[3] = b[i2 + 3];
1133e48d15efSToby Isaac           while (nz--) {
1134e48d15efSToby Isaac             idx   = 4 * (*vi++);
11359371c9d4SSatish Balay             xw[0] = x[idx];
11369371c9d4SSatish Balay             xw[1] = x[1 + idx];
11379371c9d4SSatish Balay             xw[2] = x[2 + idx];
11389371c9d4SSatish Balay             xw[3] = x[3 + idx];
1139e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140e48d15efSToby Isaac             v += 16;
1141e48d15efSToby Isaac           }
1142e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
11439371c9d4SSatish Balay           x[i2] += xw[0];
11449371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11459371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11469371c9d4SSatish Balay           x[i2 + 3] += xw[3];
1147e48d15efSToby Isaac           idiag -= 16;
1148e48d15efSToby Isaac           i2 -= 4;
1149e48d15efSToby Isaac         }
1150e48d15efSToby Isaac         break;
1151e48d15efSToby Isaac       case 5:
1152e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1153e48d15efSToby Isaac           v    = aa + 25 * ai[i];
1154e48d15efSToby Isaac           vi   = aj + ai[i];
1155e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11569371c9d4SSatish Balay           s[0] = b[i2];
11579371c9d4SSatish Balay           s[1] = b[i2 + 1];
11589371c9d4SSatish Balay           s[2] = b[i2 + 2];
11599371c9d4SSatish Balay           s[3] = b[i2 + 3];
11609371c9d4SSatish Balay           s[4] = b[i2 + 4];
1161e48d15efSToby Isaac           while (nz--) {
1162e48d15efSToby Isaac             idx   = 5 * (*vi++);
11639371c9d4SSatish Balay             xw[0] = x[idx];
11649371c9d4SSatish Balay             xw[1] = x[1 + idx];
11659371c9d4SSatish Balay             xw[2] = x[2 + idx];
11669371c9d4SSatish Balay             xw[3] = x[3 + idx];
11679371c9d4SSatish Balay             xw[4] = x[4 + idx];
1168e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169e48d15efSToby Isaac             v += 25;
1170e48d15efSToby Isaac           }
1171e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
11729371c9d4SSatish Balay           x[i2] += xw[0];
11739371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11749371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11759371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11769371c9d4SSatish Balay           x[i2 + 4] += xw[4];
1177e48d15efSToby Isaac           idiag -= 25;
1178e48d15efSToby Isaac           i2 -= 5;
1179e48d15efSToby Isaac         }
1180e48d15efSToby Isaac         break;
1181e48d15efSToby Isaac       case 6:
1182e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1183e48d15efSToby Isaac           v    = aa + 36 * ai[i];
1184e48d15efSToby Isaac           vi   = aj + ai[i];
1185e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11869371c9d4SSatish Balay           s[0] = b[i2];
11879371c9d4SSatish Balay           s[1] = b[i2 + 1];
11889371c9d4SSatish Balay           s[2] = b[i2 + 2];
11899371c9d4SSatish Balay           s[3] = b[i2 + 3];
11909371c9d4SSatish Balay           s[4] = b[i2 + 4];
11919371c9d4SSatish Balay           s[5] = b[i2 + 5];
1192e48d15efSToby Isaac           while (nz--) {
1193e48d15efSToby Isaac             idx   = 6 * (*vi++);
11949371c9d4SSatish Balay             xw[0] = x[idx];
11959371c9d4SSatish Balay             xw[1] = x[1 + idx];
11969371c9d4SSatish Balay             xw[2] = x[2 + idx];
11979371c9d4SSatish Balay             xw[3] = x[3 + idx];
11989371c9d4SSatish Balay             xw[4] = x[4 + idx];
11999371c9d4SSatish Balay             xw[5] = x[5 + idx];
1200e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201e48d15efSToby Isaac             v += 36;
1202e48d15efSToby Isaac           }
1203e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
12049371c9d4SSatish Balay           x[i2] += xw[0];
12059371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12069371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12079371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12089371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12099371c9d4SSatish Balay           x[i2 + 5] += xw[5];
1210e48d15efSToby Isaac           idiag -= 36;
1211e48d15efSToby Isaac           i2 -= 6;
1212e48d15efSToby Isaac         }
1213e48d15efSToby Isaac         break;
1214e48d15efSToby Isaac       case 7:
1215e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1216e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1217e48d15efSToby Isaac           vi   = aj + ai[i];
1218e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
12199371c9d4SSatish Balay           s[0] = b[i2];
12209371c9d4SSatish Balay           s[1] = b[i2 + 1];
12219371c9d4SSatish Balay           s[2] = b[i2 + 2];
12229371c9d4SSatish Balay           s[3] = b[i2 + 3];
12239371c9d4SSatish Balay           s[4] = b[i2 + 4];
12249371c9d4SSatish Balay           s[5] = b[i2 + 5];
12259371c9d4SSatish Balay           s[6] = b[i2 + 6];
1226e48d15efSToby Isaac           while (nz--) {
1227e48d15efSToby Isaac             idx   = 7 * (*vi++);
12289371c9d4SSatish Balay             xw[0] = x[idx];
12299371c9d4SSatish Balay             xw[1] = x[1 + idx];
12309371c9d4SSatish Balay             xw[2] = x[2 + idx];
12319371c9d4SSatish Balay             xw[3] = x[3 + idx];
12329371c9d4SSatish Balay             xw[4] = x[4 + idx];
12339371c9d4SSatish Balay             xw[5] = x[5 + idx];
12349371c9d4SSatish Balay             xw[6] = x[6 + idx];
1235e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236e48d15efSToby Isaac             v += 49;
1237e48d15efSToby Isaac           }
1238e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
12399371c9d4SSatish Balay           x[i2] += xw[0];
12409371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12419371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12429371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12439371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12449371c9d4SSatish Balay           x[i2 + 5] += xw[5];
12459371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1246e48d15efSToby Isaac           idiag -= 49;
1247e48d15efSToby Isaac           i2 -= 7;
1248e48d15efSToby Isaac         }
1249e48d15efSToby Isaac         break;
1250e48d15efSToby Isaac       default:
1251e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1252e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1253e48d15efSToby Isaac           vi = aj + ai[i];
1254e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1255e48d15efSToby Isaac 
12569566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1257e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1258e48d15efSToby Isaac           workt = work;
1259e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
12609566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261e48d15efSToby Isaac             workt += bs;
1262e48d15efSToby Isaac           }
1263e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265e48d15efSToby Isaac 
1266e48d15efSToby Isaac           idiag -= bs2;
1267e48d15efSToby Isaac           i2 -= bs;
1268e48d15efSToby Isaac         }
1269e48d15efSToby Isaac         break;
1270e48d15efSToby Isaac       }
12719566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272e48d15efSToby Isaac     }
1273e48d15efSToby Isaac   }
12749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1277de80f912SBarry Smith }
1278de80f912SBarry Smith 
1279af674e45SBarry Smith /*
128081824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281af674e45SBarry Smith */
1282af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283af674e45SBarry Smith   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285af674e45SBarry Smith   #define matsetvaluesblocked4_ matsetvaluesblocked4
1286af674e45SBarry Smith #endif
1287af674e45SBarry Smith 
1288d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289d71ae5a4SJacob Faibussowitsch {
1290af674e45SBarry Smith   Mat                A = *AA;
1291af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1292c1ac3661SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293c1ac3661SBarry Smith   PetscInt          *ai = a->i, *ailen = a->ilen;
129417ec6a02SBarry Smith   PetscInt          *aj = a->j, stepval, lastcol = -1;
1295f15d580aSBarry Smith   const PetscScalar *value = v;
12964bb09213Spetsc   MatScalar         *ap, *aa = a->a, *bap;
1297af674e45SBarry Smith 
1298af674e45SBarry Smith   PetscFunctionBegin;
1299ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300af674e45SBarry Smith   stepval = (n - 1) * 4;
1301af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
1302af674e45SBarry Smith     row  = im[k];
1303af674e45SBarry Smith     rp   = aj + ai[row];
1304af674e45SBarry Smith     ap   = aa + 16 * ai[row];
1305af674e45SBarry Smith     nrow = ailen[row];
1306af674e45SBarry Smith     low  = 0;
130717ec6a02SBarry Smith     high = nrow;
1308af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
1309af674e45SBarry Smith       col = in[l];
1310db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1311db4deed7SKarl Rupp       else high = nrow;
131217ec6a02SBarry Smith       lastcol = col;
13131e3347e8SBarry Smith       value   = v + k * (stepval + 4 + l) * 4;
1314af674e45SBarry Smith       while (high - low > 7) {
1315af674e45SBarry Smith         t = (low + high) / 2;
1316af674e45SBarry Smith         if (rp[t] > col) high = t;
1317af674e45SBarry Smith         else low = t;
1318af674e45SBarry Smith       }
1319af674e45SBarry Smith       for (i = low; i < high; i++) {
1320af674e45SBarry Smith         if (rp[i] > col) break;
1321af674e45SBarry Smith         if (rp[i] == col) {
1322af674e45SBarry Smith           bap = ap + 16 * i;
1323af674e45SBarry Smith           for (ii = 0; ii < 4; ii++, value += stepval) {
1324ad540459SPierre Jolivet             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325af674e45SBarry Smith           }
1326af674e45SBarry Smith           goto noinsert2;
1327af674e45SBarry Smith         }
1328af674e45SBarry Smith       }
1329af674e45SBarry Smith       N = nrow++ - 1;
133017ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
1331af674e45SBarry Smith       /* shift up all the later entries in this row */
1332af674e45SBarry Smith       for (ii = N; ii >= i; ii--) {
1333af674e45SBarry Smith         rp[ii + 1] = rp[ii];
13349566063dSJacob Faibussowitsch         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335af674e45SBarry Smith       }
133648a46eb9SPierre Jolivet       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337af674e45SBarry Smith       rp[i] = col;
1338af674e45SBarry Smith       bap   = ap + 16 * i;
1339af674e45SBarry Smith       for (ii = 0; ii < 4; ii++, value += stepval) {
1340ad540459SPierre Jolivet         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341af674e45SBarry Smith       }
1342af674e45SBarry Smith     noinsert2:;
1343af674e45SBarry Smith       low = i;
1344af674e45SBarry Smith     }
1345af674e45SBarry Smith     ailen[row] = nrow;
1346af674e45SBarry Smith   }
1347be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1348af674e45SBarry Smith }
1349af674e45SBarry Smith 
1350af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351af674e45SBarry Smith   #define matsetvalues4_ MATSETVALUES4
1352af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353af674e45SBarry Smith   #define matsetvalues4_ matsetvalues4
1354af674e45SBarry Smith #endif
1355af674e45SBarry Smith 
1356d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357d71ae5a4SJacob Faibussowitsch {
1358af674e45SBarry Smith   Mat          A = *AA;
1359af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360580bdb30SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
1362c1ac3661SBarry Smith   PetscInt    *aj = a->j, brow, bcol;
136317ec6a02SBarry Smith   PetscInt     ridx, cidx, lastcol = -1;
1364af674e45SBarry Smith   MatScalar   *ap, value, *aa      = a->a, *bap;
1365af674e45SBarry Smith 
1366af674e45SBarry Smith   PetscFunctionBegin;
1367af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
13689371c9d4SSatish Balay     row  = im[k];
13699371c9d4SSatish Balay     brow = row / 4;
1370af674e45SBarry Smith     rp   = aj + ai[brow];
1371af674e45SBarry Smith     ap   = aa + 16 * ai[brow];
1372af674e45SBarry Smith     nrow = ailen[brow];
1373af674e45SBarry Smith     low  = 0;
137417ec6a02SBarry Smith     high = nrow;
1375af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
13769371c9d4SSatish Balay       col   = in[l];
13779371c9d4SSatish Balay       bcol  = col / 4;
13789371c9d4SSatish Balay       ridx  = row % 4;
13799371c9d4SSatish Balay       cidx  = col % 4;
1380af674e45SBarry Smith       value = v[l + k * n];
1381db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1382db4deed7SKarl Rupp       else high = nrow;
138317ec6a02SBarry Smith       lastcol = col;
1384af674e45SBarry Smith       while (high - low > 7) {
1385af674e45SBarry Smith         t = (low + high) / 2;
1386af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1387af674e45SBarry Smith         else low = t;
1388af674e45SBarry Smith       }
1389af674e45SBarry Smith       for (i = low; i < high; i++) {
1390af674e45SBarry Smith         if (rp[i] > bcol) break;
1391af674e45SBarry Smith         if (rp[i] == bcol) {
1392af674e45SBarry Smith           bap = ap + 16 * i + 4 * cidx + ridx;
1393af674e45SBarry Smith           *bap += value;
1394af674e45SBarry Smith           goto noinsert1;
1395af674e45SBarry Smith         }
1396af674e45SBarry Smith       }
1397af674e45SBarry Smith       N = nrow++ - 1;
139817ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1399af674e45SBarry Smith       /* shift up all the later entries in this row */
14009566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
14019566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
14029566063dSJacob Faibussowitsch       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403af674e45SBarry Smith       rp[i]                        = bcol;
1404af674e45SBarry Smith       ap[16 * i + 4 * cidx + ridx] = value;
1405af674e45SBarry Smith     noinsert1:;
1406af674e45SBarry Smith       low = i;
1407af674e45SBarry Smith     }
1408af674e45SBarry Smith     ailen[brow] = nrow;
1409af674e45SBarry Smith   }
1410be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1411af674e45SBarry Smith }
1412af674e45SBarry Smith 
1413be5855fcSBarry Smith /*
1414be5855fcSBarry Smith      Checks for missing diagonals
1415be5855fcSBarry Smith */
1416d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1417d71ae5a4SJacob Faibussowitsch {
1418be5855fcSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14197734d3b5SMatthew G. Knepley   PetscInt    *diag, *ii = a->i, i;
1420be5855fcSBarry Smith 
1421be5855fcSBarry Smith   PetscFunctionBegin;
14229566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
14232af78befSBarry Smith   *missing = PETSC_FALSE;
14247734d3b5SMatthew G. Knepley   if (A->rmap->n > 0 && !ii) {
14252efa7f71SHong Zhang     *missing = PETSC_TRUE;
14262efa7f71SHong Zhang     if (d) *d = 0;
14279566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
14282efa7f71SHong Zhang   } else {
142901445905SHong Zhang     PetscInt n;
143001445905SHong Zhang     n    = PetscMin(a->mbs, a->nbs);
1431883fce79SBarry Smith     diag = a->diag;
143201445905SHong Zhang     for (i = 0; i < n; i++) {
14337734d3b5SMatthew G. Knepley       if (diag[i] >= ii[i + 1]) {
14342af78befSBarry Smith         *missing = PETSC_TRUE;
14352af78befSBarry Smith         if (d) *d = i;
14369566063dSJacob Faibussowitsch         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1437358d2f5dSShri Abhyankar         break;
14382efa7f71SHong Zhang       }
1439be5855fcSBarry Smith     }
1440be5855fcSBarry Smith   }
14413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1442be5855fcSBarry Smith }
1443be5855fcSBarry Smith 
1444d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1445d71ae5a4SJacob Faibussowitsch {
1446de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
144709f38230SBarry Smith   PetscInt     i, j, m = a->mbs;
1448de6a44a3SBarry Smith 
14493a40ed3dSBarry Smith   PetscFunctionBegin;
145009f38230SBarry Smith   if (!a->diag) {
14519566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(m, &a->diag));
14524fd072dbSBarry Smith     a->free_diag = PETSC_TRUE;
145309f38230SBarry Smith   }
14547fc0212eSBarry Smith   for (i = 0; i < m; i++) {
145509f38230SBarry Smith     a->diag[i] = a->i[i + 1];
1456de6a44a3SBarry Smith     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1457de6a44a3SBarry Smith       if (a->j[j] == i) {
145809f38230SBarry Smith         a->diag[i] = j;
1459de6a44a3SBarry Smith         break;
1460de6a44a3SBarry Smith       }
1461de6a44a3SBarry Smith     }
1462de6a44a3SBarry Smith   }
14633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1464de6a44a3SBarry Smith }
14652593348eSBarry Smith 
1466d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1467d71ae5a4SJacob Faibussowitsch {
14683b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14691a83f524SJed Brown   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
14701a83f524SJed Brown   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
14713b2fbd54SBarry Smith 
14723a40ed3dSBarry Smith   PetscFunctionBegin;
14733b2fbd54SBarry Smith   *nn = n;
14743ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
14753b2fbd54SBarry Smith   if (symmetric) {
14769566063dSJacob Faibussowitsch     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1477553b3c51SBarry Smith     nz = tia[n];
14783b2fbd54SBarry Smith   } else {
14799371c9d4SSatish Balay     tia = a->i;
14809371c9d4SSatish Balay     tja = a->j;
14813b2fbd54SBarry Smith   }
14823b2fbd54SBarry Smith 
1483ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1484ecc77c7aSBarry Smith     (*nn) *= bs;
14858f7157efSSatish Balay     /* malloc & create the natural set of indices */
14869566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((n + 1) * bs, ia));
14879985e31cSBarry Smith     if (n) {
14882462f5fdSStefano Zampini       (*ia)[0] = oshift;
1489ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
14909985e31cSBarry Smith     }
1491ecc77c7aSBarry Smith 
1492ecc77c7aSBarry Smith     for (i = 1; i < n; i++) {
1493ecc77c7aSBarry Smith       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1494ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
14958f7157efSSatish Balay     }
1496ad540459SPierre Jolivet     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1497ecc77c7aSBarry Smith 
14981a83f524SJed Brown     if (inja) {
14999566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz * bs * bs, ja));
15009985e31cSBarry Smith       cnt = 0;
15019985e31cSBarry Smith       for (i = 0; i < n; i++) {
15029985e31cSBarry Smith         for (j = 0; j < bs; j++) {
15039985e31cSBarry Smith           for (k = tia[i]; k < tia[i + 1]; k++) {
1504ad540459SPierre Jolivet             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
15059985e31cSBarry Smith           }
15069985e31cSBarry Smith         }
15079985e31cSBarry Smith       }
15089985e31cSBarry Smith     }
15099985e31cSBarry Smith 
15108f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
15119566063dSJacob Faibussowitsch       PetscCall(PetscFree(tia));
15129566063dSJacob Faibussowitsch       PetscCall(PetscFree(tja));
15138f7157efSSatish Balay     }
1514f6d58c54SBarry Smith   } else if (oshift == 1) {
1515715a17b5SBarry Smith     if (symmetric) {
1516a2ea699eSBarry Smith       nz = tia[A->rmap->n / bs];
1517715a17b5SBarry Smith       /*  add 1 to i and j indices */
1518715a17b5SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1519715a17b5SBarry Smith       *ia = tia;
1520715a17b5SBarry Smith       if (ja) {
1521715a17b5SBarry Smith         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1522715a17b5SBarry Smith         *ja = tja;
1523715a17b5SBarry Smith       }
1524715a17b5SBarry Smith     } else {
1525a2ea699eSBarry Smith       nz = a->i[A->rmap->n / bs];
1526f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
15279566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1528f6d58c54SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1529f6d58c54SBarry Smith       if (ja) {
15309566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(nz, ja));
1531f6d58c54SBarry Smith         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1532f6d58c54SBarry Smith       }
1533715a17b5SBarry Smith     }
15348f7157efSSatish Balay   } else {
15358f7157efSSatish Balay     *ia = tia;
1536ecc77c7aSBarry Smith     if (ja) *ja = tja;
15378f7157efSSatish Balay   }
15383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15393b2fbd54SBarry Smith }
15403b2fbd54SBarry Smith 
1541d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1542d71ae5a4SJacob Faibussowitsch {
15433a40ed3dSBarry Smith   PetscFunctionBegin;
15443ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1545715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
15469566063dSJacob Faibussowitsch     PetscCall(PetscFree(*ia));
15479566063dSJacob Faibussowitsch     if (ja) PetscCall(PetscFree(*ja));
15483b2fbd54SBarry Smith   }
15493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15503b2fbd54SBarry Smith }
15513b2fbd54SBarry Smith 
1552d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1553d71ae5a4SJacob Faibussowitsch {
15542d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15552d61bbb3SSatish Balay 
1556433994e6SBarry Smith   PetscFunctionBegin;
1557b4e2f619SBarry Smith   if (A->hash_active) {
1558b4e2f619SBarry Smith     PetscInt bs;
1559e3c72094SPierre Jolivet     A->ops[0] = a->cops;
1560b4e2f619SBarry Smith     PetscCall(PetscHMapIJVDestroy(&a->ht));
1561b4e2f619SBarry Smith     PetscCall(MatGetBlockSize(A, &bs));
1562b4e2f619SBarry Smith     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1563b4e2f619SBarry Smith     PetscCall(PetscFree(a->dnz));
1564b4e2f619SBarry Smith     PetscCall(PetscFree(a->bdnz));
1565b4e2f619SBarry Smith     A->hash_active = PETSC_FALSE;
1566b4e2f619SBarry Smith   }
15673ba16761SJacob Faibussowitsch   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
15689566063dSJacob Faibussowitsch   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
15699566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
15709566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
15719566063dSJacob Faibussowitsch   if (a->free_diag) PetscCall(PetscFree(a->diag));
15729566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->idiag));
15739566063dSJacob Faibussowitsch   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
15749566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->solve_work));
15759566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->mult_work));
15769566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_workt));
15779566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_work));
15789566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
15799566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->saved_values));
15809566063dSJacob Faibussowitsch   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1581c4319e64SHong Zhang 
15829566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->sbaijMat));
15839566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->parent));
15849566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1585901853e0SKris Buschelman 
15869566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
15879566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
15889566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
15899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
15909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
15919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
15929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
15939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
15949566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
15959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
15969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
15979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
15987ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
15999566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
16007ea3e4caSstefano_zampini #endif
16019566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
16022e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
16033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16042d61bbb3SSatish Balay }
16052d61bbb3SSatish Balay 
160666976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1607d71ae5a4SJacob Faibussowitsch {
16082d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
16092d61bbb3SSatish Balay 
16102d61bbb3SSatish Balay   PetscFunctionBegin;
1611aa275fccSKris Buschelman   switch (op) {
1612d71ae5a4SJacob Faibussowitsch   case MAT_ROW_ORIENTED:
1613d71ae5a4SJacob Faibussowitsch     a->roworiented = flg;
1614d71ae5a4SJacob Faibussowitsch     break;
1615d71ae5a4SJacob Faibussowitsch   case MAT_KEEP_NONZERO_PATTERN:
1616d71ae5a4SJacob Faibussowitsch     a->keepnonzeropattern = flg;
1617d71ae5a4SJacob Faibussowitsch     break;
1618d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATIONS:
1619d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? 0 : 1);
1620d71ae5a4SJacob Faibussowitsch     break;
1621d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATION_ERR:
1622d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -1 : 0);
1623d71ae5a4SJacob Faibussowitsch     break;
1624d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1625d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -2 : 0);
1626d71ae5a4SJacob Faibussowitsch     break;
1627d71ae5a4SJacob Faibussowitsch   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1628d71ae5a4SJacob Faibussowitsch     a->nounused = (flg ? -1 : 0);
1629d71ae5a4SJacob Faibussowitsch     break;
16308c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1631aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
1632aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
1633d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
1634d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1635d71ae5a4SJacob Faibussowitsch     break;
16365021d80fSJed Brown   case MAT_SPD:
163777e54ba9SKris Buschelman   case MAT_SYMMETRIC:
163877e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
16399a4540c5SBarry Smith   case MAT_HERMITIAN:
16409a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1641b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1642c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1643672ba085SHong Zhang   case MAT_STRUCTURE_ONLY:
1644b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
1645b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
164677e54ba9SKris Buschelman     break;
1647d71ae5a4SJacob Faibussowitsch   default:
1648d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
16492d61bbb3SSatish Balay   }
16503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16512d61bbb3SSatish Balay }
16522d61bbb3SSatish Balay 
165352768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
1654d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1655d71ae5a4SJacob Faibussowitsch {
165652768537SHong Zhang   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
165752768537SHong Zhang   MatScalar   *aa_i;
165887828ca2SBarry Smith   PetscScalar *v_i;
16592d61bbb3SSatish Balay 
16602d61bbb3SSatish Balay   PetscFunctionBegin;
1661d0f46423SBarry Smith   bs  = A->rmap->bs;
166252768537SHong Zhang   bs2 = bs * bs;
16635f80ce2aSJacob Faibussowitsch   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
16642d61bbb3SSatish Balay 
16652d61bbb3SSatish Balay   bn  = row / bs; /* Block number */
16662d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
16672d61bbb3SSatish Balay   M   = ai[bn + 1] - ai[bn];
16682d61bbb3SSatish Balay   *nz = bs * M;
16692d61bbb3SSatish Balay 
16702d61bbb3SSatish Balay   if (v) {
1671f4259b30SLisandro Dalcin     *v = NULL;
16722d61bbb3SSatish Balay     if (*nz) {
16739566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, v));
16742d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16752d61bbb3SSatish Balay         v_i  = *v + i * bs;
16762d61bbb3SSatish Balay         aa_i = aa + bs2 * (ai[bn] + i);
167726fbe8dcSKarl Rupp         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
16782d61bbb3SSatish Balay       }
16792d61bbb3SSatish Balay     }
16802d61bbb3SSatish Balay   }
16812d61bbb3SSatish Balay 
16822d61bbb3SSatish Balay   if (idx) {
1683f4259b30SLisandro Dalcin     *idx = NULL;
16842d61bbb3SSatish Balay     if (*nz) {
16859566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, idx));
16862d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16872d61bbb3SSatish Balay         idx_i = *idx + i * bs;
16882d61bbb3SSatish Balay         itmp  = bs * aj[ai[bn] + i];
168926fbe8dcSKarl Rupp         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
16902d61bbb3SSatish Balay       }
16912d61bbb3SSatish Balay     }
16922d61bbb3SSatish Balay   }
16933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16942d61bbb3SSatish Balay }
16952d61bbb3SSatish Balay 
1696d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1697d71ae5a4SJacob Faibussowitsch {
169852768537SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
169952768537SHong Zhang 
170052768537SHong Zhang   PetscFunctionBegin;
17019566063dSJacob Faibussowitsch   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
17023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
170352768537SHong Zhang }
170452768537SHong Zhang 
1705d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1706d71ae5a4SJacob Faibussowitsch {
17072d61bbb3SSatish Balay   PetscFunctionBegin;
1708cb4a9cd9SHong Zhang   if (nz) *nz = 0;
17099566063dSJacob Faibussowitsch   if (idx) PetscCall(PetscFree(*idx));
17109566063dSJacob Faibussowitsch   if (v) PetscCall(PetscFree(*v));
17113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17122d61bbb3SSatish Balay }
17132d61bbb3SSatish Balay 
171466976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1715d71ae5a4SJacob Faibussowitsch {
171620e84f26SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
17172d61bbb3SSatish Balay   Mat          C;
171820e84f26SHong Zhang   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
171920e84f26SHong Zhang   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
172020e84f26SHong Zhang   MatScalar   *ata, *aa = a->a;
17212d61bbb3SSatish Balay 
17222d61bbb3SSatish Balay   PetscFunctionBegin;
17237fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
17249566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1725cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
172620e84f26SHong Zhang     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
17272d61bbb3SSatish Balay 
17289566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
17299566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
17309566063dSJacob Faibussowitsch     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
17319566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
173220e84f26SHong Zhang 
173320e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
173420e84f26SHong Zhang     ati = at->i;
173520e84f26SHong Zhang     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1736fc4dec0aSBarry Smith   } else {
1737fc4dec0aSBarry Smith     C   = *B;
173820e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
173920e84f26SHong Zhang     ati = at->i;
1740fc4dec0aSBarry Smith   }
1741fc4dec0aSBarry Smith 
174220e84f26SHong Zhang   atj = at->j;
174320e84f26SHong Zhang   ata = at->a;
174420e84f26SHong Zhang 
174520e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
17469566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(atfill, ati, nbs));
174720e84f26SHong Zhang 
174820e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
17492d61bbb3SSatish Balay   for (i = 0; i < mbs; i++) {
175020e84f26SHong Zhang     anzj = ai[i + 1] - ai[i];
175120e84f26SHong Zhang     for (j = 0; j < anzj; j++) {
175220e84f26SHong Zhang       atj[atfill[*aj]] = i;
175320e84f26SHong Zhang       for (kr = 0; kr < bs; kr++) {
1754ad540459SPierre Jolivet         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
17552d61bbb3SSatish Balay       }
175620e84f26SHong Zhang       atfill[*aj++] += 1;
175720e84f26SHong Zhang     }
175820e84f26SHong Zhang   }
17599566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
17609566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
17612d61bbb3SSatish Balay 
176220e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
17639566063dSJacob Faibussowitsch   PetscCall(PetscFree(atfill));
176420e84f26SHong Zhang 
1765cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
17669566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
17672d61bbb3SSatish Balay     *B = C;
17682d61bbb3SSatish Balay   } else {
17699566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &C));
17702d61bbb3SSatish Balay   }
17713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17722d61bbb3SSatish Balay }
17732d61bbb3SSatish Balay 
1774ff6a9541SJacob Faibussowitsch static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1775d71ae5a4SJacob Faibussowitsch {
1776453d3561SHong Zhang   Mat Btrans;
1777453d3561SHong Zhang 
1778453d3561SHong Zhang   PetscFunctionBegin;
1779453d3561SHong Zhang   *f = PETSC_FALSE;
1780acd337a6SBarry Smith   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
17819566063dSJacob Faibussowitsch   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
17829566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Btrans));
17833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1784453d3561SHong Zhang }
1785453d3561SHong Zhang 
1786618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1787d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1788d71ae5a4SJacob Faibussowitsch {
1789b51a4376SLisandro Dalcin   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1790b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1791b51a4376SLisandro Dalcin   PetscInt    *rowlens, *colidxs;
1792b51a4376SLisandro Dalcin   PetscScalar *matvals;
17932593348eSBarry Smith 
17943a40ed3dSBarry Smith   PetscFunctionBegin;
17959566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
17963b2fbd54SBarry Smith 
1797b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1798b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1799b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1800b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1801b51a4376SLisandro Dalcin   nz = bs * bs * A->nz;
18022593348eSBarry Smith 
1803b51a4376SLisandro Dalcin   /* write matrix header */
1804b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
18059371c9d4SSatish Balay   header[1] = M;
18069371c9d4SSatish Balay   header[2] = N;
18079371c9d4SSatish Balay   header[3] = nz;
18089566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
18092593348eSBarry Smith 
1810b51a4376SLisandro Dalcin   /* store row lengths */
18119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
1812b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
18139371c9d4SSatish Balay     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
18149566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
18159566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
1816b51a4376SLisandro Dalcin 
1817b51a4376SLisandro Dalcin   /* store column indices  */
18189566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
1819b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1820b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1821b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
18229371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
18235f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
18249566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
18259566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
18262593348eSBarry Smith 
18272593348eSBarry Smith   /* store nonzero values */
18289566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
1829b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1830b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1831b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
18329371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
18335f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
18349566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
18359566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
1836ce6f0cecSBarry Smith 
1837b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
18389566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
18393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18402593348eSBarry Smith }
18412593348eSBarry Smith 
1842d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1843d71ae5a4SJacob Faibussowitsch {
18447dc0baabSHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
18457dc0baabSHong Zhang   PetscInt     i, bs = A->rmap->bs, k;
18467dc0baabSHong Zhang 
18477dc0baabSHong Zhang   PetscFunctionBegin;
18489566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
18497dc0baabSHong Zhang   for (i = 0; i < a->mbs; i++) {
18509566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
185148a46eb9SPierre Jolivet     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
18529566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18537dc0baabSHong Zhang   }
18549566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18567dc0baabSHong Zhang }
18577dc0baabSHong Zhang 
1858d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1859d71ae5a4SJacob Faibussowitsch {
1860b6490206SBarry Smith   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1861d0f46423SBarry Smith   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1862f3ef73ceSBarry Smith   PetscViewerFormat format;
18632593348eSBarry Smith 
18643a40ed3dSBarry Smith   PetscFunctionBegin;
18657dc0baabSHong Zhang   if (A->structure_only) {
18669566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
18673ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
18687dc0baabSHong Zhang   }
18697dc0baabSHong Zhang 
18709566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
1871456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
18729566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1873fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1874ade3a672SBarry Smith     const char *matname;
1875bcd9e38bSBarry Smith     Mat         aij;
18769566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
18779566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
18789566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
18799566063dSJacob Faibussowitsch     PetscCall(MatView(aij, viewer));
18809566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&aij));
188104929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
18823ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1883fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
18849566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
188544cd7ae7SLois Curfman McInnes     for (i = 0; i < a->mbs; i++) {
188644cd7ae7SLois Curfman McInnes       for (j = 0; j < bs; j++) {
18879566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
188844cd7ae7SLois Curfman McInnes         for (k = a->i[i]; k < a->i[i + 1]; k++) {
188944cd7ae7SLois Curfman McInnes           for (l = 0; l < bs; l++) {
1890aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18910e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18929371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18930e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18949371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18950e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18969566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
18970ef38995SBarry Smith             }
189844cd7ae7SLois Curfman McInnes #else
189948a46eb9SPierre Jolivet             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
190044cd7ae7SLois Curfman McInnes #endif
190144cd7ae7SLois Curfman McInnes           }
190244cd7ae7SLois Curfman McInnes         }
19039566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
190444cd7ae7SLois Curfman McInnes       }
190544cd7ae7SLois Curfman McInnes     }
19069566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
19070ef38995SBarry Smith   } else {
19089566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1909b6490206SBarry Smith     for (i = 0; i < a->mbs; i++) {
1910b6490206SBarry Smith       for (j = 0; j < bs; j++) {
19119566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1912b6490206SBarry Smith         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1913b6490206SBarry Smith           for (l = 0; l < bs; l++) {
1914aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
19150e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
19169371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
19170e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
19189371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
19190ef38995SBarry Smith             } else {
19209566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
192188685aaeSLois Curfman McInnes             }
192288685aaeSLois Curfman McInnes #else
19239566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
192488685aaeSLois Curfman McInnes #endif
19252593348eSBarry Smith           }
19262593348eSBarry Smith         }
19279566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
19282593348eSBarry Smith       }
19292593348eSBarry Smith     }
19309566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1931b6490206SBarry Smith   }
19329566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
19333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19342593348eSBarry Smith }
19352593348eSBarry Smith 
19369804daf3SBarry Smith #include <petscdraw.h>
1937d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1938d71ae5a4SJacob Faibussowitsch {
193977ed5343SBarry Smith   Mat               A = (Mat)Aa;
19403270192aSSatish Balay   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1941d0f46423SBarry Smith   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
19420e6d2581SBarry Smith   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
19433f1db9ecSBarry Smith   MatScalar        *aa;
1944b0a32e0cSBarry Smith   PetscViewer       viewer;
1945b3e7f47fSJed Brown   PetscViewerFormat format;
19463270192aSSatish Balay 
19473a40ed3dSBarry Smith   PetscFunctionBegin;
19489566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
19499566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
19509566063dSJacob Faibussowitsch   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
195177ed5343SBarry Smith 
19523270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1953b3e7f47fSJed Brown 
1954b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1955d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1956383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1957b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
19583270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19593270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19609371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19619371c9d4SSatish Balay         y_r = y_l + 1.0;
19629371c9d4SSatish Balay         x_l = a->j[j] * bs;
19639371c9d4SSatish Balay         x_r = x_l + 1.0;
19643270192aSSatish Balay         aa  = a->a + j * bs2;
19653270192aSSatish Balay         for (k = 0; k < bs; k++) {
19663270192aSSatish Balay           for (l = 0; l < bs; l++) {
19670e6d2581SBarry Smith             if (PetscRealPart(*aa++) >= 0.) continue;
19689566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19693270192aSSatish Balay           }
19703270192aSSatish Balay         }
19713270192aSSatish Balay       }
19723270192aSSatish Balay     }
1973b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
19743270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19753270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19769371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19779371c9d4SSatish Balay         y_r = y_l + 1.0;
19789371c9d4SSatish Balay         x_l = a->j[j] * bs;
19799371c9d4SSatish Balay         x_r = x_l + 1.0;
19803270192aSSatish Balay         aa  = a->a + j * bs2;
19813270192aSSatish Balay         for (k = 0; k < bs; k++) {
19823270192aSSatish Balay           for (l = 0; l < bs; l++) {
19830e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
19849566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19853270192aSSatish Balay           }
19863270192aSSatish Balay         }
19873270192aSSatish Balay       }
19883270192aSSatish Balay     }
1989b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
19903270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19913270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19929371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19939371c9d4SSatish Balay         y_r = y_l + 1.0;
19949371c9d4SSatish Balay         x_l = a->j[j] * bs;
19959371c9d4SSatish Balay         x_r = x_l + 1.0;
19963270192aSSatish Balay         aa  = a->a + j * bs2;
19973270192aSSatish Balay         for (k = 0; k < bs; k++) {
19983270192aSSatish Balay           for (l = 0; l < bs; l++) {
19990e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
20009566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
20013270192aSSatish Balay           }
20023270192aSSatish Balay         }
20033270192aSSatish Balay       }
20043270192aSSatish Balay     }
2005d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
2006b3e7f47fSJed Brown   } else {
2007b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
2008b3e7f47fSJed Brown     /* first determine max of all nonzero values */
2009b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
2010b3e7f47fSJed Brown     PetscDraw popup;
2011b3e7f47fSJed Brown 
2012b3e7f47fSJed Brown     for (i = 0; i < a->nz * a->bs2; i++) {
2013b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
2014b3e7f47fSJed Brown     }
2015383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
20169566063dSJacob Faibussowitsch     PetscCall(PetscDrawGetPopup(draw, &popup));
20179566063dSJacob Faibussowitsch     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2018383922c3SLisandro Dalcin 
2019d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
2020b3e7f47fSJed Brown     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2021b3e7f47fSJed Brown       for (j = a->i[i]; j < a->i[i + 1]; j++) {
20229371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
20239371c9d4SSatish Balay         y_r = y_l + 1.0;
20249371c9d4SSatish Balay         x_l = a->j[j] * bs;
20259371c9d4SSatish Balay         x_r = x_l + 1.0;
2026b3e7f47fSJed Brown         aa  = a->a + j * bs2;
2027b3e7f47fSJed Brown         for (k = 0; k < bs; k++) {
2028b3e7f47fSJed Brown           for (l = 0; l < bs; l++) {
2029383922c3SLisandro Dalcin             MatScalar v = *aa++;
2030383922c3SLisandro Dalcin             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
20319566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2032b3e7f47fSJed Brown           }
2033b3e7f47fSJed Brown         }
2034b3e7f47fSJed Brown       }
2035b3e7f47fSJed Brown     }
2036d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
2037b3e7f47fSJed Brown   }
20383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
203977ed5343SBarry Smith }
20403270192aSSatish Balay 
2041d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2042d71ae5a4SJacob Faibussowitsch {
20430e6d2581SBarry Smith   PetscReal xl, yl, xr, yr, w, h;
2044b0a32e0cSBarry Smith   PetscDraw draw;
2045ace3abfcSBarry Smith   PetscBool isnull;
20463270192aSSatish Balay 
204777ed5343SBarry Smith   PetscFunctionBegin;
20489566063dSJacob Faibussowitsch   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
20499566063dSJacob Faibussowitsch   PetscCall(PetscDrawIsNull(draw, &isnull));
20503ba16761SJacob Faibussowitsch   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
205177ed5343SBarry Smith 
20529371c9d4SSatish Balay   xr = A->cmap->n;
20539371c9d4SSatish Balay   yr = A->rmap->N;
20549371c9d4SSatish Balay   h  = yr / 10.0;
20559371c9d4SSatish Balay   w  = xr / 10.0;
20569371c9d4SSatish Balay   xr += w;
20579371c9d4SSatish Balay   yr += h;
20589371c9d4SSatish Balay   xl = -w;
20599371c9d4SSatish Balay   yl = -h;
20609566063dSJacob Faibussowitsch   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
20619566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
20629566063dSJacob Faibussowitsch   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
20639566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
20649566063dSJacob Faibussowitsch   PetscCall(PetscDrawSave(draw));
20653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20663270192aSSatish Balay }
20673270192aSSatish Balay 
2068d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2069d71ae5a4SJacob Faibussowitsch {
2070ace3abfcSBarry Smith   PetscBool iascii, isbinary, isdraw;
20712593348eSBarry Smith 
20723a40ed3dSBarry Smith   PetscFunctionBegin;
20739566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
20749566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
20759566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
207632077d6dSBarry Smith   if (iascii) {
20779566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
20780f5bd95cSBarry Smith   } else if (isbinary) {
20799566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
20800f5bd95cSBarry Smith   } else if (isdraw) {
20819566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
20825cd90555SBarry Smith   } else {
2083a5e6ed63SBarry Smith     Mat B;
20849566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
20859566063dSJacob Faibussowitsch     PetscCall(MatView(B, viewer));
20869566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
20872593348eSBarry Smith   }
20883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20892593348eSBarry Smith }
2090b6490206SBarry Smith 
2091d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2092d71ae5a4SJacob Faibussowitsch {
2093cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2094c1ac3661SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2095c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
2096d0f46423SBarry Smith   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
209797e567efSBarry Smith   MatScalar   *ap, *aa = a->a;
2098cd0e1443SSatish Balay 
20993a40ed3dSBarry Smith   PetscFunctionBegin;
21002d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over rows */
21019371c9d4SSatish Balay     row  = im[k];
21029371c9d4SSatish Balay     brow = row / bs;
21039371c9d4SSatish Balay     if (row < 0) {
21049371c9d4SSatish Balay       v += n;
21059371c9d4SSatish Balay       continue;
21069371c9d4SSatish Balay     } /* negative row */
210754c59aa7SJacob Faibussowitsch     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2108d29f2997SMatthew Woehlke     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2109d29f2997SMatthew Woehlke     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
21102c3acbe9SBarry Smith     nrow = ailen[brow];
21112d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over columns */
21129371c9d4SSatish Balay       if (in[l] < 0) {
21139371c9d4SSatish Balay         v++;
21149371c9d4SSatish Balay         continue;
21159371c9d4SSatish Balay       } /* negative column */
211654c59aa7SJacob Faibussowitsch       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
21172d61bbb3SSatish Balay       col  = in[l];
21182d61bbb3SSatish Balay       bcol = col / bs;
21192d61bbb3SSatish Balay       cidx = col % bs;
21202d61bbb3SSatish Balay       ridx = row % bs;
21212d61bbb3SSatish Balay       high = nrow;
21222d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
21232d61bbb3SSatish Balay       while (high - low > 5) {
2124cd0e1443SSatish Balay         t = (low + high) / 2;
2125cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
2126cd0e1443SSatish Balay         else low = t;
2127cd0e1443SSatish Balay       }
2128cd0e1443SSatish Balay       for (i = low; i < high; i++) {
2129cd0e1443SSatish Balay         if (rp[i] > bcol) break;
2130cd0e1443SSatish Balay         if (rp[i] == bcol) {
21312d61bbb3SSatish Balay           *v++ = ap[bs2 * i + bs * cidx + ridx];
21322d61bbb3SSatish Balay           goto finished;
2133cd0e1443SSatish Balay         }
2134cd0e1443SSatish Balay       }
213597e567efSBarry Smith       *v++ = 0.0;
21362d61bbb3SSatish Balay     finished:;
2137cd0e1443SSatish Balay     }
2138cd0e1443SSatish Balay   }
21393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2140cd0e1443SSatish Balay }
2141cd0e1443SSatish Balay 
2142d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2143d71ae5a4SJacob Faibussowitsch {
214492c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2145e2ee6c50SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2146c1ac3661SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2147d0f46423SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2148ace3abfcSBarry Smith   PetscBool          roworiented = a->roworiented;
2149dd6ea824SBarry Smith   const PetscScalar *value       = v;
21509d243f67SHong Zhang   MatScalar         *ap = NULL, *aa = a->a, *bap;
215192c4ed94SBarry Smith 
21523a40ed3dSBarry Smith   PetscFunctionBegin;
21530e324ae4SSatish Balay   if (roworiented) {
21540e324ae4SSatish Balay     stepval = (n - 1) * bs;
21550e324ae4SSatish Balay   } else {
21560e324ae4SSatish Balay     stepval = (m - 1) * bs;
21570e324ae4SSatish Balay   }
215892c4ed94SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
215992c4ed94SBarry Smith     row = im[k];
21605ef9f2a5SBarry Smith     if (row < 0) continue;
21616bdcaf15SBarry Smith     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
216292c4ed94SBarry Smith     rp = aj + ai[row];
21637dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[row];
216492c4ed94SBarry Smith     rmax = imax[row];
216592c4ed94SBarry Smith     nrow = ailen[row];
216692c4ed94SBarry Smith     low  = 0;
2167c71e6ed7SBarry Smith     high = nrow;
216892c4ed94SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
21695ef9f2a5SBarry Smith       if (in[l] < 0) continue;
21706bdcaf15SBarry Smith       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
217192c4ed94SBarry Smith       col = in[l];
21727dc0baabSHong Zhang       if (!A->structure_only) {
217392c4ed94SBarry Smith         if (roworiented) {
217453ef36baSBarry Smith           value = v + (k * (stepval + bs) + l) * bs;
21750e324ae4SSatish Balay         } else {
217653ef36baSBarry Smith           value = v + (l * (stepval + bs) + k) * bs;
217792c4ed94SBarry Smith         }
21787dc0baabSHong Zhang       }
217926fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
218026fbe8dcSKarl Rupp       else high = nrow;
2181e2ee6c50SBarry Smith       lastcol = col;
218292c4ed94SBarry Smith       while (high - low > 7) {
218392c4ed94SBarry Smith         t = (low + high) / 2;
218492c4ed94SBarry Smith         if (rp[t] > col) high = t;
218592c4ed94SBarry Smith         else low = t;
218692c4ed94SBarry Smith       }
218792c4ed94SBarry Smith       for (i = low; i < high; i++) {
218892c4ed94SBarry Smith         if (rp[i] > col) break;
218992c4ed94SBarry Smith         if (rp[i] == col) {
21907dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
21918a84c255SSatish Balay           bap = ap + bs2 * i;
21920e324ae4SSatish Balay           if (roworiented) {
21938a84c255SSatish Balay             if (is == ADD_VALUES) {
2194dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2195ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2196dd9472c6SBarry Smith               }
21970e324ae4SSatish Balay             } else {
2198dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2199ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2200dd9472c6SBarry Smith               }
2201dd9472c6SBarry Smith             }
22020e324ae4SSatish Balay           } else {
22030e324ae4SSatish Balay             if (is == ADD_VALUES) {
220453ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2205ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
220653ef36baSBarry Smith                 bap += bs;
2207dd9472c6SBarry Smith               }
22080e324ae4SSatish Balay             } else {
220953ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2210ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
221153ef36baSBarry Smith                 bap += bs;
22128a84c255SSatish Balay               }
2213dd9472c6SBarry Smith             }
2214dd9472c6SBarry Smith           }
2215f1241b54SBarry Smith           goto noinsert2;
221692c4ed94SBarry Smith         }
221792c4ed94SBarry Smith       }
221889280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
22195f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
22207dc0baabSHong Zhang       if (A->structure_only) {
22217dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
22227dc0baabSHong Zhang       } else {
2223fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
22247dc0baabSHong Zhang       }
22259371c9d4SSatish Balay       N = nrow++ - 1;
22269371c9d4SSatish Balay       high++;
222792c4ed94SBarry Smith       /* shift up all the later entries in this row */
22289566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
222992c4ed94SBarry Smith       rp[i] = col;
22307dc0baabSHong Zhang       if (!A->structure_only) {
22319566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
22328a84c255SSatish Balay         bap = ap + bs2 * i;
22330e324ae4SSatish Balay         if (roworiented) {
2234dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2235ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2236dd9472c6SBarry Smith           }
22370e324ae4SSatish Balay         } else {
2238dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2239ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2240dd9472c6SBarry Smith           }
2241dd9472c6SBarry Smith         }
22427dc0baabSHong Zhang       }
2243f1241b54SBarry Smith     noinsert2:;
224492c4ed94SBarry Smith       low = i;
224592c4ed94SBarry Smith     }
224692c4ed94SBarry Smith     ailen[row] = nrow;
224792c4ed94SBarry Smith   }
22483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
224992c4ed94SBarry Smith }
225026e093fcSHong Zhang 
2251d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2252d71ae5a4SJacob Faibussowitsch {
2253584200bdSSatish Balay   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2254580bdb30SBarry Smith   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2255d0f46423SBarry Smith   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2256c1ac3661SBarry Smith   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
22573f1db9ecSBarry Smith   MatScalar   *aa    = a->a, *ap;
22583447b6efSHong Zhang   PetscReal    ratio = 0.6;
2259584200bdSSatish Balay 
22603a40ed3dSBarry Smith   PetscFunctionBegin;
2261d32568d8SPierre Jolivet   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2262584200bdSSatish Balay 
226343ee02c3SBarry Smith   if (m) rmax = ailen[0];
2264584200bdSSatish Balay   for (i = 1; i < mbs; i++) {
2265584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
2266584200bdSSatish Balay     fshift += imax[i - 1] - ailen[i - 1];
2267d402145bSBarry Smith     rmax = PetscMax(rmax, ailen[i]);
2268584200bdSSatish Balay     if (fshift) {
2269580bdb30SBarry Smith       ip = aj + ai[i];
2270580bdb30SBarry Smith       ap = aa + bs2 * ai[i];
2271584200bdSSatish Balay       N  = ailen[i];
22729566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(ip - fshift, ip, N));
227348a46eb9SPierre Jolivet       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2274672ba085SHong Zhang     }
2275584200bdSSatish Balay     ai[i] = ai[i - 1] + ailen[i - 1];
2276584200bdSSatish Balay   }
2277584200bdSSatish Balay   if (mbs) {
2278584200bdSSatish Balay     fshift += imax[mbs - 1] - ailen[mbs - 1];
2279584200bdSSatish Balay     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2280584200bdSSatish Balay   }
22817c565772SBarry Smith 
2282584200bdSSatish Balay   /* reset ilen and imax for each row */
22837c565772SBarry Smith   a->nonzerorowcnt = 0;
2284672ba085SHong Zhang   if (A->structure_only) {
22859566063dSJacob Faibussowitsch     PetscCall(PetscFree2(a->imax, a->ilen));
2286672ba085SHong Zhang   } else { /* !A->structure_only */
2287584200bdSSatish Balay     for (i = 0; i < mbs; i++) {
2288584200bdSSatish Balay       ailen[i] = imax[i] = ai[i + 1] - ai[i];
22897c565772SBarry Smith       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2290584200bdSSatish Balay     }
2291672ba085SHong Zhang   }
2292a7c10996SSatish Balay   a->nz = ai[mbs];
2293584200bdSSatish Balay 
2294584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
2295b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
2296ff6a9541SJacob Faibussowitsch   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
22975f80ce2aSJacob Faibussowitsch   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
22989566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
22999566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
23009566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
230126fbe8dcSKarl Rupp 
23028e58a170SBarry Smith   A->info.mallocs += a->reallocs;
2303e2f3b5e9SSatish Balay   a->reallocs         = 0;
23040e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2305647a6520SHong Zhang   a->rmax             = rmax;
2306cf4441caSHong Zhang 
230748a46eb9SPierre Jolivet   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
23083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2309584200bdSSatish Balay }
2310584200bdSSatish Balay 
2311bea157c4SSatish Balay /*
2312bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2313bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2314a5b23f4aSJose E. Roman    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2315bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2316bea157c4SSatish Balay */
2317d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2318d71ae5a4SJacob Faibussowitsch {
2319ff6a9541SJacob Faibussowitsch   PetscInt j = 0;
23203a40ed3dSBarry Smith 
2321433994e6SBarry Smith   PetscFunctionBegin;
2322ff6a9541SJacob Faibussowitsch   for (PetscInt i = 0; i < n; j++) {
2323ff6a9541SJacob Faibussowitsch     PetscInt row = idx[i];
2324a5b23f4aSJose E. Roman     if (row % bs != 0) { /* Not the beginning of a block */
2325bea157c4SSatish Balay       sizes[j] = 1;
2326bea157c4SSatish Balay       i++;
2327e4fda26cSSatish Balay     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2328bea157c4SSatish Balay       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2329bea157c4SSatish Balay       i++;
23306aad120cSJose E. Roman     } else { /* Beginning of the block, so check if the complete block exists */
2331ff6a9541SJacob Faibussowitsch       PetscBool flg = PETSC_TRUE;
2332ff6a9541SJacob Faibussowitsch       for (PetscInt k = 1; k < bs; k++) {
2333bea157c4SSatish Balay         if (row + k != idx[i + k]) { /* break in the block */
2334bea157c4SSatish Balay           flg = PETSC_FALSE;
2335bea157c4SSatish Balay           break;
2336d9b7c43dSSatish Balay         }
2337bea157c4SSatish Balay       }
2338abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2339bea157c4SSatish Balay         sizes[j] = bs;
2340bea157c4SSatish Balay         i += bs;
2341bea157c4SSatish Balay       } else {
2342bea157c4SSatish Balay         sizes[j] = 1;
2343bea157c4SSatish Balay         i++;
2344bea157c4SSatish Balay       }
2345bea157c4SSatish Balay     }
2346bea157c4SSatish Balay   }
2347bea157c4SSatish Balay   *bs_max = j;
23483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2349d9b7c43dSSatish Balay }
2350d9b7c43dSSatish Balay 
2351d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2352d71ae5a4SJacob Faibussowitsch {
2353d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2354f4df32b1SMatthew Knepley   PetscInt           i, j, k, count, *rows;
2355d0f46423SBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
235687828ca2SBarry Smith   PetscScalar        zero = 0.0;
23573f1db9ecSBarry Smith   MatScalar         *aa;
235897b48c8fSBarry Smith   const PetscScalar *xx;
235997b48c8fSBarry Smith   PetscScalar       *bb;
2360d9b7c43dSSatish Balay 
23613a40ed3dSBarry Smith   PetscFunctionBegin;
236297b48c8fSBarry Smith   /* fix right hand side if needed */
236397b48c8fSBarry Smith   if (x && b) {
23649566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23659566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
2366ad540459SPierre Jolivet     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
23679566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
23689566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
236997b48c8fSBarry Smith   }
237097b48c8fSBarry Smith 
2371d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2372bea157c4SSatish Balay   /* allocate memory for rows,sizes */
23739566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2374bea157c4SSatish Balay 
2375563b5814SBarry Smith   /* copy IS values to rows, and sort them */
237626fbe8dcSKarl Rupp   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
23779566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(is_n, rows));
237897b48c8fSBarry Smith 
2379a9817697SBarry Smith   if (baij->keepnonzeropattern) {
238026fbe8dcSKarl Rupp     for (i = 0; i < is_n; i++) sizes[i] = 1;
2381dffd3267SBarry Smith     bs_max = is_n;
2382dffd3267SBarry Smith   } else {
23839566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2384e56f5c9eSBarry Smith     A->nonzerostate++;
2385dffd3267SBarry Smith   }
2386bea157c4SSatish Balay 
2387bea157c4SSatish Balay   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2388bea157c4SSatish Balay     row = rows[j];
23895f80ce2aSJacob Faibussowitsch     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2390bea157c4SSatish Balay     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2391b31fbe3bSSatish Balay     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2392a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2393d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2394bea157c4SSatish Balay         if (baij->ilen[row / bs] > 0) {
2395bea157c4SSatish Balay           baij->ilen[row / bs]       = 1;
2396bea157c4SSatish Balay           baij->j[baij->i[row / bs]] = row / bs;
239726fbe8dcSKarl Rupp 
23989566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(aa, count * bs));
2399a07cd24cSSatish Balay         }
2400563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
240148a46eb9SPierre Jolivet         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2402f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2403bea157c4SSatish Balay         baij->ilen[row / bs] = 0;
2404f4df32b1SMatthew Knepley       }      /* end (diag == 0.0) */
2405bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
24066bdcaf15SBarry Smith       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2407bea157c4SSatish Balay       for (k = 0; k < count; k++) {
2408d9b7c43dSSatish Balay         aa[0] = zero;
2409d9b7c43dSSatish Balay         aa += bs;
2410d9b7c43dSSatish Balay       }
241148a46eb9SPierre Jolivet       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2412d9b7c43dSSatish Balay     }
2413bea157c4SSatish Balay   }
2414bea157c4SSatish Balay 
24159566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, sizes));
24169566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
24173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2418d9b7c43dSSatish Balay }
24191c351548SSatish Balay 
2420ff6a9541SJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2421d71ae5a4SJacob Faibussowitsch {
242297b48c8fSBarry Smith   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
242397b48c8fSBarry Smith   PetscInt           i, j, k, count;
242497b48c8fSBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
242597b48c8fSBarry Smith   PetscScalar        zero = 0.0;
242697b48c8fSBarry Smith   MatScalar         *aa;
242797b48c8fSBarry Smith   const PetscScalar *xx;
242897b48c8fSBarry Smith   PetscScalar       *bb;
242956777dd2SBarry Smith   PetscBool         *zeroed, vecs = PETSC_FALSE;
243097b48c8fSBarry Smith 
243197b48c8fSBarry Smith   PetscFunctionBegin;
243297b48c8fSBarry Smith   /* fix right hand side if needed */
243397b48c8fSBarry Smith   if (x && b) {
24349566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
24359566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
243656777dd2SBarry Smith     vecs = PETSC_TRUE;
243797b48c8fSBarry Smith   }
243897b48c8fSBarry Smith 
243997b48c8fSBarry Smith   /* zero the columns */
24409566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
244197b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
24425f80ce2aSJacob Faibussowitsch     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
244397b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
244497b48c8fSBarry Smith   }
244597b48c8fSBarry Smith   for (i = 0; i < A->rmap->N; i++) {
244697b48c8fSBarry Smith     if (!zeroed[i]) {
244797b48c8fSBarry Smith       row = i / bs;
244897b48c8fSBarry Smith       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
244997b48c8fSBarry Smith         for (k = 0; k < bs; k++) {
245097b48c8fSBarry Smith           col = bs * baij->j[j] + k;
245197b48c8fSBarry Smith           if (zeroed[col]) {
245297b48c8fSBarry Smith             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
245356777dd2SBarry Smith             if (vecs) bb[i] -= aa[0] * xx[col];
245497b48c8fSBarry Smith             aa[0] = 0.0;
245597b48c8fSBarry Smith           }
245697b48c8fSBarry Smith         }
245797b48c8fSBarry Smith       }
245856777dd2SBarry Smith     } else if (vecs) bb[i] = diag * xx[i];
245997b48c8fSBarry Smith   }
24609566063dSJacob Faibussowitsch   PetscCall(PetscFree(zeroed));
246156777dd2SBarry Smith   if (vecs) {
24629566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
24639566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
246456777dd2SBarry Smith   }
246597b48c8fSBarry Smith 
246697b48c8fSBarry Smith   /* zero the rows */
246797b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
246897b48c8fSBarry Smith     row   = is_idx[i];
246997b48c8fSBarry Smith     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
247097b48c8fSBarry Smith     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
247197b48c8fSBarry Smith     for (k = 0; k < count; k++) {
247297b48c8fSBarry Smith       aa[0] = zero;
247397b48c8fSBarry Smith       aa += bs;
247497b48c8fSBarry Smith     }
2475dbbe0bcdSBarry Smith     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
247697b48c8fSBarry Smith   }
24779566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
24783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
247997b48c8fSBarry Smith }
248097b48c8fSBarry Smith 
2481d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2482d71ae5a4SJacob Faibussowitsch {
24832d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2484e2ee6c50SBarry Smith   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2485c1ac3661SBarry Smith   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2486d0f46423SBarry Smith   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2487c1ac3661SBarry Smith   PetscInt     ridx, cidx, bs2                 = a->bs2;
2488ace3abfcSBarry Smith   PetscBool    roworiented = a->roworiented;
2489d8cdefa3SHong Zhang   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
24902d61bbb3SSatish Balay 
24912d61bbb3SSatish Balay   PetscFunctionBegin;
24922d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over added rows */
2493085a36d4SBarry Smith     row  = im[k];
2494085a36d4SBarry Smith     brow = row / bs;
24955ef9f2a5SBarry Smith     if (row < 0) continue;
24966bdcaf15SBarry Smith     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
24972d61bbb3SSatish Balay     rp = aj + ai[brow];
2498672ba085SHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[brow];
24992d61bbb3SSatish Balay     rmax = imax[brow];
25002d61bbb3SSatish Balay     nrow = ailen[brow];
25012d61bbb3SSatish Balay     low  = 0;
2502c71e6ed7SBarry Smith     high = nrow;
25032d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over added columns */
25045ef9f2a5SBarry Smith       if (in[l] < 0) continue;
25056bdcaf15SBarry Smith       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
25069371c9d4SSatish Balay       col  = in[l];
25079371c9d4SSatish Balay       bcol = col / bs;
25089371c9d4SSatish Balay       ridx = row % bs;
25099371c9d4SSatish Balay       cidx = col % bs;
2510672ba085SHong Zhang       if (!A->structure_only) {
25112d61bbb3SSatish Balay         if (roworiented) {
25125ef9f2a5SBarry Smith           value = v[l + k * n];
25132d61bbb3SSatish Balay         } else {
25142d61bbb3SSatish Balay           value = v[k + l * m];
25152d61bbb3SSatish Balay         }
2516672ba085SHong Zhang       }
25179371c9d4SSatish Balay       if (col <= lastcol) low = 0;
25189371c9d4SSatish Balay       else high = nrow;
2519e2ee6c50SBarry Smith       lastcol = col;
25202d61bbb3SSatish Balay       while (high - low > 7) {
25212d61bbb3SSatish Balay         t = (low + high) / 2;
25222d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
25232d61bbb3SSatish Balay         else low = t;
25242d61bbb3SSatish Balay       }
25252d61bbb3SSatish Balay       for (i = low; i < high; i++) {
25262d61bbb3SSatish Balay         if (rp[i] > bcol) break;
25272d61bbb3SSatish Balay         if (rp[i] == bcol) {
25282d61bbb3SSatish Balay           bap = ap + bs2 * i + bs * cidx + ridx;
2529672ba085SHong Zhang           if (!A->structure_only) {
25302d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
25312d61bbb3SSatish Balay             else *bap = value;
2532672ba085SHong Zhang           }
25332d61bbb3SSatish Balay           goto noinsert1;
25342d61bbb3SSatish Balay         }
25352d61bbb3SSatish Balay       }
25362d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
25375f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2538672ba085SHong Zhang       if (A->structure_only) {
2539672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2540672ba085SHong Zhang       } else {
2541fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2542672ba085SHong Zhang       }
25439371c9d4SSatish Balay       N = nrow++ - 1;
25449371c9d4SSatish Balay       high++;
25452d61bbb3SSatish Balay       /* shift up all the later entries in this row */
25469566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
25472d61bbb3SSatish Balay       rp[i] = bcol;
2548580bdb30SBarry Smith       if (!A->structure_only) {
25499566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
25509566063dSJacob Faibussowitsch         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2551580bdb30SBarry Smith         ap[bs2 * i + bs * cidx + ridx] = value;
2552580bdb30SBarry Smith       }
2553085a36d4SBarry Smith       a->nz++;
2554e56f5c9eSBarry Smith       A->nonzerostate++;
25552d61bbb3SSatish Balay     noinsert1:;
25562d61bbb3SSatish Balay       low = i;
25572d61bbb3SSatish Balay     }
25582d61bbb3SSatish Balay     ailen[brow] = nrow;
25592d61bbb3SSatish Balay   }
25603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25612d61bbb3SSatish Balay }
25622d61bbb3SSatish Balay 
2563ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2564d71ae5a4SJacob Faibussowitsch {
25652d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
25662d61bbb3SSatish Balay   Mat          outA;
2567ace3abfcSBarry Smith   PetscBool    row_identity, col_identity;
25682d61bbb3SSatish Balay 
25692d61bbb3SSatish Balay   PetscFunctionBegin;
25705f80ce2aSJacob Faibussowitsch   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
25719566063dSJacob Faibussowitsch   PetscCall(ISIdentity(row, &row_identity));
25729566063dSJacob Faibussowitsch   PetscCall(ISIdentity(col, &col_identity));
25735f80ce2aSJacob Faibussowitsch   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
25742d61bbb3SSatish Balay 
25752d61bbb3SSatish Balay   outA            = inA;
2576d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
25779566063dSJacob Faibussowitsch   PetscCall(PetscFree(inA->solvertype));
25789566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
25792d61bbb3SSatish Balay 
25809566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2581cf242676SKris Buschelman 
25829566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)row));
25839566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
2584c3122656SLisandro Dalcin   a->row = row;
25859566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)col));
25869566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
2587c3122656SLisandro Dalcin   a->col = col;
2588c38d4ed2SBarry Smith 
2589c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
25909566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
25919566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2592c38d4ed2SBarry Smith 
25939566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2594aa624791SPierre Jolivet   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
25959566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(outA, inA, info));
25963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25972d61bbb3SSatish Balay }
2598d9b7c43dSSatish Balay 
2599ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2600d71ae5a4SJacob Faibussowitsch {
260127a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
260227a8da17SBarry Smith 
260327a8da17SBarry Smith   PetscFunctionBegin;
2604ff6a9541SJacob Faibussowitsch   baij->nz = baij->maxnz;
2605ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2606ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
26073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
260827a8da17SBarry Smith }
260927a8da17SBarry Smith 
261027a8da17SBarry Smith /*@
261111a5261eSBarry Smith   MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix.
261227a8da17SBarry Smith 
261327a8da17SBarry Smith   Input Parameters:
261411a5261eSBarry Smith + mat     - the `MATSEQBAIJ` matrix
261527a8da17SBarry Smith - indices - the column indices
261627a8da17SBarry Smith 
261715091d37SBarry Smith   Level: advanced
261815091d37SBarry Smith 
261927a8da17SBarry Smith   Notes:
262027a8da17SBarry Smith   This can be called if you have precomputed the nonzero structure of the
262127a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
262211a5261eSBarry Smith   of the `MatSetValues()` operation.
262327a8da17SBarry Smith 
262427a8da17SBarry Smith   You MUST have set the correct numbers of nonzeros per row in the call to
262511a5261eSBarry Smith   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
262627a8da17SBarry Smith 
262711a5261eSBarry Smith   MUST be called before any calls to `MatSetValues()`
262827a8da17SBarry Smith 
26291cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
263027a8da17SBarry Smith @*/
2631d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2632d71ae5a4SJacob Faibussowitsch {
263327a8da17SBarry Smith   PetscFunctionBegin;
26340700a824SBarry Smith   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
26354f572ea9SToby Isaac   PetscAssertPointer(indices, 2);
2636cac4c232SBarry Smith   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
26373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
263827a8da17SBarry Smith }
263927a8da17SBarry Smith 
264066976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2641d71ae5a4SJacob Faibussowitsch {
2642273d9f13SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2643c1ac3661SBarry Smith   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2644273d9f13SBarry Smith   PetscReal    atmp;
264587828ca2SBarry Smith   PetscScalar *x, zero = 0.0;
2646273d9f13SBarry Smith   MatScalar   *aa;
2647c1ac3661SBarry Smith   PetscInt     ncols, brow, krow, kcol;
2648273d9f13SBarry Smith 
2649273d9f13SBarry Smith   PetscFunctionBegin;
26505f80ce2aSJacob Faibussowitsch   /* why is this not a macro???????????????????????????????????????????????????????????????? */
26515f80ce2aSJacob Faibussowitsch   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2652d0f46423SBarry Smith   bs  = A->rmap->bs;
2653273d9f13SBarry Smith   aa  = a->a;
2654273d9f13SBarry Smith   ai  = a->i;
2655273d9f13SBarry Smith   aj  = a->j;
2656273d9f13SBarry Smith   mbs = a->mbs;
2657273d9f13SBarry Smith 
26589566063dSJacob Faibussowitsch   PetscCall(VecSet(v, zero));
26599566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
26609566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(v, &n));
26615f80ce2aSJacob Faibussowitsch   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2662273d9f13SBarry Smith   for (i = 0; i < mbs; i++) {
26639371c9d4SSatish Balay     ncols = ai[1] - ai[0];
26649371c9d4SSatish Balay     ai++;
2665273d9f13SBarry Smith     brow = bs * i;
2666273d9f13SBarry Smith     for (j = 0; j < ncols; j++) {
2667273d9f13SBarry Smith       for (kcol = 0; kcol < bs; kcol++) {
2668273d9f13SBarry Smith         for (krow = 0; krow < bs; krow++) {
26699371c9d4SSatish Balay           atmp = PetscAbsScalar(*aa);
26709371c9d4SSatish Balay           aa++;
2671273d9f13SBarry Smith           row = brow + krow; /* row index */
26729371c9d4SSatish Balay           if (PetscAbsScalar(x[row]) < atmp) {
26739371c9d4SSatish Balay             x[row] = atmp;
26749371c9d4SSatish Balay             if (idx) idx[row] = bs * (*aj) + kcol;
26759371c9d4SSatish Balay           }
2676273d9f13SBarry Smith         }
2677273d9f13SBarry Smith       }
2678273d9f13SBarry Smith       aj++;
2679273d9f13SBarry Smith     }
2680273d9f13SBarry Smith   }
26819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
26823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2683273d9f13SBarry Smith }
2684273d9f13SBarry Smith 
268566976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2686d71ae5a4SJacob Faibussowitsch {
26873c896bc6SHong Zhang   PetscFunctionBegin;
26883c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
26893c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
26903c896bc6SHong Zhang     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
26913c896bc6SHong Zhang     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2692d88c0aacSHong Zhang     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
26933c896bc6SHong Zhang 
26945f80ce2aSJacob Faibussowitsch     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
26955f80ce2aSJacob Faibussowitsch     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
26969566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
26979566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)B));
26983c896bc6SHong Zhang   } else {
26999566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
27003c896bc6SHong Zhang   }
27013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
27023c896bc6SHong Zhang }
27033c896bc6SHong Zhang 
2704d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2705d71ae5a4SJacob Faibussowitsch {
2706f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27076e111a19SKarl Rupp 
2708f2a5309cSSatish Balay   PetscFunctionBegin;
2709f2a5309cSSatish Balay   *array = a->a;
27103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2711f2a5309cSSatish Balay }
2712f2a5309cSSatish Balay 
2713d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2714d71ae5a4SJacob Faibussowitsch {
2715f2a5309cSSatish Balay   PetscFunctionBegin;
2716cda14afcSprj-   *array = NULL;
27173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2718f2a5309cSSatish Balay }
2719f2a5309cSSatish Balay 
2720d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2721d71ae5a4SJacob Faibussowitsch {
2722b264fe52SHong Zhang   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
272352768537SHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
272452768537SHong Zhang   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
272552768537SHong Zhang 
272652768537SHong Zhang   PetscFunctionBegin;
272752768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
27289566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
27293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
273052768537SHong Zhang }
273152768537SHong Zhang 
2732d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2733d71ae5a4SJacob Faibussowitsch {
273442ee4b1aSHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
273531ce2d13SHong Zhang   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2736e838b9e7SJed Brown   PetscBLASInt one = 1;
273742ee4b1aSHong Zhang 
273842ee4b1aSHong Zhang   PetscFunctionBegin;
2739134adf20SPierre Jolivet   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2740134adf20SPierre Jolivet     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2741134adf20SPierre Jolivet     if (e) {
27429566063dSJacob Faibussowitsch       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2743134adf20SPierre Jolivet       if (e) {
27449566063dSJacob Faibussowitsch         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2745134adf20SPierre Jolivet         if (e) str = SAME_NONZERO_PATTERN;
2746134adf20SPierre Jolivet       }
2747134adf20SPierre Jolivet     }
274854c59aa7SJacob Faibussowitsch     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2749134adf20SPierre Jolivet   }
275042ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2751f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2752c5df96a5SBarry Smith     PetscBLASInt bnz;
27539566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2754792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
27559566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2756ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
27579566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
275842ee4b1aSHong Zhang   } else {
275952768537SHong Zhang     Mat       B;
276052768537SHong Zhang     PetscInt *nnz;
276154c59aa7SJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
27629566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
27639566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
27649566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
27659566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
27669566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
27679566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
27689566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
27699566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
27709566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
27719566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
27729566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
277342ee4b1aSHong Zhang   }
27743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
277542ee4b1aSHong Zhang }
277642ee4b1aSHong Zhang 
2777d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2778d71ae5a4SJacob Faibussowitsch {
2779ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
27802726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27812726fb6dSPierre Jolivet   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
27822726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
27832726fb6dSPierre Jolivet 
27842726fb6dSPierre Jolivet   PetscFunctionBegin;
27852726fb6dSPierre Jolivet   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
27863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2787ff6a9541SJacob Faibussowitsch #else
2788ff6a9541SJacob Faibussowitsch   (void)A;
2789ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2790ff6a9541SJacob Faibussowitsch #endif
27912726fb6dSPierre Jolivet }
27922726fb6dSPierre Jolivet 
2793ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2794d71ae5a4SJacob Faibussowitsch {
2795ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
279699cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
279799cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2798dd6ea824SBarry Smith   MatScalar   *aa = a->a;
279999cafbc1SBarry Smith 
280099cafbc1SBarry Smith   PetscFunctionBegin;
280199cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
28023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2803ff6a9541SJacob Faibussowitsch #else
2804ff6a9541SJacob Faibussowitsch   (void)A;
2805ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2806ff6a9541SJacob Faibussowitsch #endif
280799cafbc1SBarry Smith }
280899cafbc1SBarry Smith 
2809ff6a9541SJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2810d71ae5a4SJacob Faibussowitsch {
2811ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
281299cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
281399cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2814dd6ea824SBarry Smith   MatScalar   *aa = a->a;
281599cafbc1SBarry Smith 
281699cafbc1SBarry Smith   PetscFunctionBegin;
281799cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
28183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2819ff6a9541SJacob Faibussowitsch #else
2820ff6a9541SJacob Faibussowitsch   (void)A;
2821ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2822ff6a9541SJacob Faibussowitsch #endif
282399cafbc1SBarry Smith }
282499cafbc1SBarry Smith 
28253acb8795SBarry Smith /*
28262479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
28273acb8795SBarry Smith */
2828ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2829d71ae5a4SJacob Faibussowitsch {
28303acb8795SBarry Smith   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
28313acb8795SBarry Smith   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
28323acb8795SBarry Smith   PetscInt     nz = a->i[m], row, *jj, mr, col;
28333acb8795SBarry Smith 
28343acb8795SBarry Smith   PetscFunctionBegin;
28353acb8795SBarry Smith   *nn = n;
28363ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28375f80ce2aSJacob Faibussowitsch   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
28389566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28409566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28413acb8795SBarry Smith   jj = a->j;
2842ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
28433acb8795SBarry Smith   cia[0] = oshift;
2844ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28459566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
28463acb8795SBarry Smith   jj = a->j;
28473acb8795SBarry Smith   for (row = 0; row < m; row++) {
28483acb8795SBarry Smith     mr = a->i[row + 1] - a->i[row];
28493acb8795SBarry Smith     for (i = 0; i < mr; i++) {
28503acb8795SBarry Smith       col = *jj++;
285126fbe8dcSKarl Rupp 
28523acb8795SBarry Smith       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
28533acb8795SBarry Smith     }
28543acb8795SBarry Smith   }
28559566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
28569371c9d4SSatish Balay   *ia = cia;
28579371c9d4SSatish Balay   *ja = cja;
28583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28593acb8795SBarry Smith }
28603acb8795SBarry Smith 
2861ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2862d71ae5a4SJacob Faibussowitsch {
28633acb8795SBarry Smith   PetscFunctionBegin;
28643ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28659566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ia));
28669566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ja));
28673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28683acb8795SBarry Smith }
28693acb8795SBarry Smith 
2870525d23c0SHong Zhang /*
2871525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2872525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2873040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2874525d23c0SHong Zhang  */
2875d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2876d71ae5a4SJacob Faibussowitsch {
2877525d23c0SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2878c0349474SHong Zhang   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2879525d23c0SHong Zhang   PetscInt     nz = a->i[m], row, *jj, mr, col;
2880525d23c0SHong Zhang   PetscInt    *cspidx;
2881f6d58c54SBarry Smith 
2882f6d58c54SBarry Smith   PetscFunctionBegin;
2883525d23c0SHong Zhang   *nn = n;
28843ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2885f6d58c54SBarry Smith 
28869566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cspidx));
2890525d23c0SHong Zhang   jj = a->j;
2891ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2892525d23c0SHong Zhang   cia[0] = oshift;
2893ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28949566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
2895525d23c0SHong Zhang   jj = a->j;
2896525d23c0SHong Zhang   for (row = 0; row < m; row++) {
2897525d23c0SHong Zhang     mr = a->i[row + 1] - a->i[row];
2898525d23c0SHong Zhang     for (i = 0; i < mr; i++) {
2899525d23c0SHong Zhang       col                                         = *jj++;
2900525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2901525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2902525d23c0SHong Zhang     }
2903525d23c0SHong Zhang   }
29049566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
2905071fcb05SBarry Smith   *ia    = cia;
2906071fcb05SBarry Smith   *ja    = cja;
2907525d23c0SHong Zhang   *spidx = cspidx;
29083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2909f6d58c54SBarry Smith }
2910f6d58c54SBarry Smith 
2911d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2912d71ae5a4SJacob Faibussowitsch {
2913525d23c0SHong Zhang   PetscFunctionBegin;
29149566063dSJacob Faibussowitsch   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
29159566063dSJacob Faibussowitsch   PetscCall(PetscFree(*spidx));
29163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2917f6d58c54SBarry Smith }
291899cafbc1SBarry Smith 
291966976f2fSJacob Faibussowitsch static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2920d71ae5a4SJacob Faibussowitsch {
29217d68702bSBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
29227d68702bSBarry Smith 
29237d68702bSBarry Smith   PetscFunctionBegin;
292448a46eb9SPierre Jolivet   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
29259566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
29263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29277d68702bSBarry Smith }
29287d68702bSBarry Smith 
292917ea310bSPierre Jolivet PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
293017ea310bSPierre Jolivet {
293117ea310bSPierre Jolivet   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
293217ea310bSPierre Jolivet   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
293317ea310bSPierre Jolivet   PetscInt     m = A->rmap->N, *ailen = a->ilen;
293417ea310bSPierre Jolivet   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
293517ea310bSPierre Jolivet   MatScalar   *aa = a->a, *ap;
293617ea310bSPierre Jolivet   PetscBool    zero;
293717ea310bSPierre Jolivet 
293817ea310bSPierre Jolivet   PetscFunctionBegin;
293917ea310bSPierre Jolivet   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
294017ea310bSPierre Jolivet   if (m) rmax = ailen[0];
294117ea310bSPierre Jolivet   for (i = 1; i <= mbs; i++) {
294217ea310bSPierre Jolivet     for (k = ai[i - 1]; k < ai[i]; k++) {
294317ea310bSPierre Jolivet       zero = PETSC_TRUE;
294417ea310bSPierre Jolivet       ap   = aa + bs2 * k;
294517ea310bSPierre Jolivet       for (j = 0; j < bs2 && zero; j++) {
294617ea310bSPierre Jolivet         if (ap[j] != 0.0) zero = PETSC_FALSE;
294717ea310bSPierre Jolivet       }
294817ea310bSPierre Jolivet       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
294917ea310bSPierre Jolivet       else {
295017ea310bSPierre Jolivet         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
295117ea310bSPierre Jolivet         aj[k - fshift] = aj[k];
295217ea310bSPierre Jolivet         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
295317ea310bSPierre Jolivet       }
295417ea310bSPierre Jolivet     }
295517ea310bSPierre Jolivet     ai[i - 1] -= fshift_prev;
295617ea310bSPierre Jolivet     fshift_prev  = fshift;
295717ea310bSPierre Jolivet     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
295817ea310bSPierre Jolivet     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
295917ea310bSPierre Jolivet     rmax = PetscMax(rmax, ailen[i - 1]);
296017ea310bSPierre Jolivet   }
296117ea310bSPierre Jolivet   if (fshift) {
296217ea310bSPierre Jolivet     if (mbs) {
296317ea310bSPierre Jolivet       ai[mbs] -= fshift;
296417ea310bSPierre Jolivet       a->nz = ai[mbs];
296517ea310bSPierre Jolivet     }
296617ea310bSPierre Jolivet     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
296717ea310bSPierre Jolivet     A->nonzerostate++;
296817ea310bSPierre Jolivet     A->info.nz_unneeded += (PetscReal)fshift;
296917ea310bSPierre Jolivet     a->rmax = rmax;
297017ea310bSPierre Jolivet     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
297117ea310bSPierre Jolivet     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
297217ea310bSPierre Jolivet   }
297317ea310bSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
297417ea310bSPierre Jolivet }
297517ea310bSPierre Jolivet 
2976dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2977cc2dc46cSBarry Smith                                        MatGetRow_SeqBAIJ,
2978cc2dc46cSBarry Smith                                        MatRestoreRow_SeqBAIJ,
2979cc2dc46cSBarry Smith                                        MatMult_SeqBAIJ_N,
298097304618SKris Buschelman                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
29817c922b88SBarry Smith                                        MatMultTranspose_SeqBAIJ,
29827c922b88SBarry Smith                                        MatMultTransposeAdd_SeqBAIJ,
2983f4259b30SLisandro Dalcin                                        NULL,
2984f4259b30SLisandro Dalcin                                        NULL,
2985f4259b30SLisandro Dalcin                                        NULL,
2986f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
2987cc2dc46cSBarry Smith                                        MatLUFactor_SeqBAIJ,
2988f4259b30SLisandro Dalcin                                        NULL,
2989f4259b30SLisandro Dalcin                                        NULL,
2990f2501298SSatish Balay                                        MatTranspose_SeqBAIJ,
299197304618SKris Buschelman                                        /* 15*/ MatGetInfo_SeqBAIJ,
2992cc2dc46cSBarry Smith                                        MatEqual_SeqBAIJ,
2993cc2dc46cSBarry Smith                                        MatGetDiagonal_SeqBAIJ,
2994cc2dc46cSBarry Smith                                        MatDiagonalScale_SeqBAIJ,
2995cc2dc46cSBarry Smith                                        MatNorm_SeqBAIJ,
2996f4259b30SLisandro Dalcin                                        /* 20*/ NULL,
2997cc2dc46cSBarry Smith                                        MatAssemblyEnd_SeqBAIJ,
2998cc2dc46cSBarry Smith                                        MatSetOption_SeqBAIJ,
2999cc2dc46cSBarry Smith                                        MatZeroEntries_SeqBAIJ,
3000d519adbfSMatthew Knepley                                        /* 24*/ MatZeroRows_SeqBAIJ,
3001f4259b30SLisandro Dalcin                                        NULL,
3002f4259b30SLisandro Dalcin                                        NULL,
3003f4259b30SLisandro Dalcin                                        NULL,
3004f4259b30SLisandro Dalcin                                        NULL,
300526cec326SBarry Smith                                        /* 29*/ MatSetUp_Seq_Hash,
3006f4259b30SLisandro Dalcin                                        NULL,
3007f4259b30SLisandro Dalcin                                        NULL,
3008f4259b30SLisandro Dalcin                                        NULL,
3009f4259b30SLisandro Dalcin                                        NULL,
3010d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_SeqBAIJ,
3011f4259b30SLisandro Dalcin                                        NULL,
3012f4259b30SLisandro Dalcin                                        NULL,
3013cc2dc46cSBarry Smith                                        MatILUFactor_SeqBAIJ,
3014f4259b30SLisandro Dalcin                                        NULL,
3015d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_SeqBAIJ,
30167dae84e0SHong Zhang                                        MatCreateSubMatrices_SeqBAIJ,
3017cc2dc46cSBarry Smith                                        MatIncreaseOverlap_SeqBAIJ,
3018cc2dc46cSBarry Smith                                        MatGetValues_SeqBAIJ,
30193c896bc6SHong Zhang                                        MatCopy_SeqBAIJ,
3020f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
3021cc2dc46cSBarry Smith                                        MatScale_SeqBAIJ,
30227d68702bSBarry Smith                                        MatShift_SeqBAIJ,
3023f4259b30SLisandro Dalcin                                        NULL,
302497b48c8fSBarry Smith                                        MatZeroRowsColumns_SeqBAIJ,
3025f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
30263b2fbd54SBarry Smith                                        MatGetRowIJ_SeqBAIJ,
302792c4ed94SBarry Smith                                        MatRestoreRowIJ_SeqBAIJ,
30283acb8795SBarry Smith                                        MatGetColumnIJ_SeqBAIJ,
30293acb8795SBarry Smith                                        MatRestoreColumnIJ_SeqBAIJ,
303093dfae19SHong Zhang                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3031f4259b30SLisandro Dalcin                                        NULL,
3032f4259b30SLisandro Dalcin                                        NULL,
3033090001bdSToby Isaac                                        NULL,
3034d3825aa8SBarry Smith                                        MatSetValuesBlocked_SeqBAIJ,
30357dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3036b9b97703SBarry Smith                                        MatDestroy_SeqBAIJ,
3037b9b97703SBarry Smith                                        MatView_SeqBAIJ,
3038f4259b30SLisandro Dalcin                                        NULL,
3039f4259b30SLisandro Dalcin                                        NULL,
3040f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
3041f4259b30SLisandro Dalcin                                        NULL,
3042f4259b30SLisandro Dalcin                                        NULL,
3043f4259b30SLisandro Dalcin                                        NULL,
3044f4259b30SLisandro Dalcin                                        NULL,
3045d519adbfSMatthew Knepley                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
3046f4259b30SLisandro Dalcin                                        NULL,
3047c87e5d42SMatthew Knepley                                        MatConvert_Basic,
3048f4259b30SLisandro Dalcin                                        NULL,
3049f4259b30SLisandro Dalcin                                        NULL,
3050f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
3051f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
3052f4259b30SLisandro Dalcin                                        NULL,
3053f4259b30SLisandro Dalcin                                        NULL,
3054f4259b30SLisandro Dalcin                                        NULL,
3055f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
3056f4259b30SLisandro Dalcin                                        NULL,
3057f4259b30SLisandro Dalcin                                        NULL,
3058f4259b30SLisandro Dalcin                                        NULL,
30595bba2384SShri Abhyankar                                        MatLoad_SeqBAIJ,
3060f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
3061f4259b30SLisandro Dalcin                                        NULL,
3062f4259b30SLisandro Dalcin                                        NULL,
3063f4259b30SLisandro Dalcin                                        NULL,
3064f4259b30SLisandro Dalcin                                        NULL,
3065f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
3066f4259b30SLisandro Dalcin                                        NULL,
3067f4259b30SLisandro Dalcin                                        NULL,
3068f4259b30SLisandro Dalcin                                        NULL,
3069f4259b30SLisandro Dalcin                                        NULL,
3070f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
3071f4259b30SLisandro Dalcin                                        NULL,
3072f4259b30SLisandro Dalcin                                        NULL,
3073f4259b30SLisandro Dalcin                                        NULL,
3074f4259b30SLisandro Dalcin                                        NULL,
3075f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
3076f4259b30SLisandro Dalcin                                        NULL,
3077f4259b30SLisandro Dalcin                                        NULL,
30782726fb6dSPierre Jolivet                                        MatConjugate_SeqBAIJ,
3079f4259b30SLisandro Dalcin                                        NULL,
3080f4259b30SLisandro Dalcin                                        /*104*/ NULL,
308199cafbc1SBarry Smith                                        MatRealPart_SeqBAIJ,
30822af78befSBarry Smith                                        MatImaginaryPart_SeqBAIJ,
3083f4259b30SLisandro Dalcin                                        NULL,
3084f4259b30SLisandro Dalcin                                        NULL,
3085f4259b30SLisandro Dalcin                                        /*109*/ NULL,
3086f4259b30SLisandro Dalcin                                        NULL,
3087f4259b30SLisandro Dalcin                                        NULL,
3088f4259b30SLisandro Dalcin                                        NULL,
3089547795f9SHong Zhang                                        MatMissingDiagonal_SeqBAIJ,
3090f4259b30SLisandro Dalcin                                        /*114*/ NULL,
3091f4259b30SLisandro Dalcin                                        NULL,
3092f4259b30SLisandro Dalcin                                        NULL,
3093f4259b30SLisandro Dalcin                                        NULL,
3094f4259b30SLisandro Dalcin                                        NULL,
3095f4259b30SLisandro Dalcin                                        /*119*/ NULL,
3096f4259b30SLisandro Dalcin                                        NULL,
3097547795f9SHong Zhang                                        MatMultHermitianTranspose_SeqBAIJ,
3098d6037b41SHong Zhang                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3099f4259b30SLisandro Dalcin                                        NULL,
3100f4259b30SLisandro Dalcin                                        /*124*/ NULL,
3101857cbf51SRichard Tran Mills                                        MatGetColumnReductions_SeqBAIJ,
31023964eb88SJed Brown                                        MatInvertBlockDiagonal_SeqBAIJ,
3103f4259b30SLisandro Dalcin                                        NULL,
3104f4259b30SLisandro Dalcin                                        NULL,
3105f4259b30SLisandro Dalcin                                        /*129*/ NULL,
3106f4259b30SLisandro Dalcin                                        NULL,
3107f4259b30SLisandro Dalcin                                        NULL,
3108f4259b30SLisandro Dalcin                                        NULL,
3109f4259b30SLisandro Dalcin                                        NULL,
3110f4259b30SLisandro Dalcin                                        /*134*/ NULL,
3111f4259b30SLisandro Dalcin                                        NULL,
3112f4259b30SLisandro Dalcin                                        NULL,
3113f4259b30SLisandro Dalcin                                        NULL,
3114f4259b30SLisandro Dalcin                                        NULL,
311546533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_Default,
3116f4259b30SLisandro Dalcin                                        NULL,
3117f4259b30SLisandro Dalcin                                        NULL,
3118bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_SeqXAIJ,
3119f4259b30SLisandro Dalcin                                        NULL,
312086e85357SHong Zhang                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3121d70f29a3SPierre Jolivet                                        MatDestroySubMatrices_SeqBAIJ,
3122d70f29a3SPierre Jolivet                                        NULL,
312399a7f59eSMark Adams                                        NULL,
312499a7f59eSMark Adams                                        NULL,
31257fb60732SBarry Smith                                        NULL,
31267fb60732SBarry Smith                                        /*150*/ NULL,
312717ea310bSPierre Jolivet                                        MatEliminateZeros_SeqBAIJ};
31282593348eSBarry Smith 
3129ff6a9541SJacob Faibussowitsch static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3130d71ae5a4SJacob Faibussowitsch {
31313e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
31328ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
31333e90b805SBarry Smith 
31343e90b805SBarry Smith   PetscFunctionBegin;
31355f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
31363e90b805SBarry Smith 
31373e90b805SBarry Smith   /* allocate space for values if not already there */
3138ff6a9541SJacob Faibussowitsch   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
31393e90b805SBarry Smith 
31403e90b805SBarry Smith   /* copy values over */
31419566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
31423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31433e90b805SBarry Smith }
31443e90b805SBarry Smith 
3145ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3146d71ae5a4SJacob Faibussowitsch {
31473e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
31488ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
31493e90b805SBarry Smith 
31503e90b805SBarry Smith   PetscFunctionBegin;
31515f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
31525f80ce2aSJacob Faibussowitsch   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
31533e90b805SBarry Smith 
31543e90b805SBarry Smith   /* copy values over */
31559566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
31563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31573e90b805SBarry Smith }
31583e90b805SBarry Smith 
3159cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3160cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3161273d9f13SBarry Smith 
3162*f9663b93SPierre Jolivet PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3163d71ae5a4SJacob Faibussowitsch {
3164ad79cf63SBarry Smith   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3165535b19f3SBarry Smith   PetscInt     i, mbs, nbs, bs2;
31668afaa268SBarry Smith   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3167a23d5eceSKris Buschelman 
3168a23d5eceSKris Buschelman   PetscFunctionBegin;
3169ad79cf63SBarry Smith   if (B->hash_active) {
3170ad79cf63SBarry Smith     PetscInt bs;
3171aea10558SJacob Faibussowitsch     B->ops[0] = b->cops;
3172ad79cf63SBarry Smith     PetscCall(PetscHMapIJVDestroy(&b->ht));
3173ad79cf63SBarry Smith     PetscCall(MatGetBlockSize(B, &bs));
3174ad79cf63SBarry Smith     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3175ad79cf63SBarry Smith     PetscCall(PetscFree(b->dnz));
3176ad79cf63SBarry Smith     PetscCall(PetscFree(b->bdnz));
3177ad79cf63SBarry Smith     B->hash_active = PETSC_FALSE;
3178ad79cf63SBarry Smith   }
31792576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3180ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
3181ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
3182ab93d7beSBarry Smith     nz             = 0;
3183ab93d7beSBarry Smith   }
31848c07d4e3SBarry Smith 
31859566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
31869566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
31879566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
31889566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3189899cda47SBarry Smith 
3190899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
3191899cda47SBarry Smith 
3192d0f46423SBarry Smith   mbs = B->rmap->n / bs;
3193d0f46423SBarry Smith   nbs = B->cmap->n / bs;
3194a23d5eceSKris Buschelman   bs2 = bs * bs;
3195a23d5eceSKris Buschelman 
31965f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3197a23d5eceSKris Buschelman 
3198a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
31995f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3200a23d5eceSKris Buschelman   if (nnz) {
3201a23d5eceSKris Buschelman     for (i = 0; i < mbs; i++) {
32025f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
32035f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3204a23d5eceSKris Buschelman     }
3205a23d5eceSKris Buschelman   }
3206a23d5eceSKris Buschelman 
3207d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
32089566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3209d0609cedSBarry Smith   PetscOptionsEnd();
32108c07d4e3SBarry Smith 
3211a23d5eceSKris Buschelman   if (!flg) {
3212a23d5eceSKris Buschelman     switch (bs) {
3213a23d5eceSKris Buschelman     case 1:
3214a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
3215a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3216a23d5eceSKris Buschelman       break;
3217a23d5eceSKris Buschelman     case 2:
3218a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
3219a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3220a23d5eceSKris Buschelman       break;
3221a23d5eceSKris Buschelman     case 3:
3222a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
3223a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3224a23d5eceSKris Buschelman       break;
3225a23d5eceSKris Buschelman     case 4:
3226a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
3227a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3228a23d5eceSKris Buschelman       break;
3229a23d5eceSKris Buschelman     case 5:
3230a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
3231a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3232a23d5eceSKris Buschelman       break;
3233a23d5eceSKris Buschelman     case 6:
3234a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
3235a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3236a23d5eceSKris Buschelman       break;
3237a23d5eceSKris Buschelman     case 7:
3238a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
3239a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3240a23d5eceSKris Buschelman       break;
32419371c9d4SSatish Balay     case 9: {
32426679dcc1SBarry Smith       PetscInt version = 1;
32439566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32446679dcc1SBarry Smith       switch (version) {
32455f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32466679dcc1SBarry Smith       case 1:
324796e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
324896e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
32499566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32506679dcc1SBarry Smith         break;
32516679dcc1SBarry Smith #endif
32526679dcc1SBarry Smith       default:
325396e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
325496e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
32559566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
325696e086a2SDaniel Kokron         break;
32576679dcc1SBarry Smith       }
32586679dcc1SBarry Smith       break;
32596679dcc1SBarry Smith     }
3260ebada01fSBarry Smith     case 11:
3261ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
3262ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3263ebada01fSBarry Smith       break;
32649371c9d4SSatish Balay     case 12: {
32656679dcc1SBarry Smith       PetscInt version = 1;
32669566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32676679dcc1SBarry Smith       switch (version) {
32686679dcc1SBarry Smith       case 1:
32696679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
32706679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
32719566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32728ab949d8SShri Abhyankar         break;
32736679dcc1SBarry Smith       case 2:
32746679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
32756679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
32769566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32776679dcc1SBarry Smith         break;
32786679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32796679dcc1SBarry Smith       case 3:
32806679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
32816679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
32829566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32836679dcc1SBarry Smith         break;
32846679dcc1SBarry Smith #endif
3285a23d5eceSKris Buschelman       default:
3286a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
3287a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
32889566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32896679dcc1SBarry Smith         break;
32906679dcc1SBarry Smith       }
32916679dcc1SBarry Smith       break;
32926679dcc1SBarry Smith     }
32939371c9d4SSatish Balay     case 15: {
32946679dcc1SBarry Smith       PetscInt version = 1;
32959566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32966679dcc1SBarry Smith       switch (version) {
32976679dcc1SBarry Smith       case 1:
32986679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
32999566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33006679dcc1SBarry Smith         break;
33016679dcc1SBarry Smith       case 2:
33026679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
33039566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33046679dcc1SBarry Smith         break;
33056679dcc1SBarry Smith       case 3:
33066679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
33079566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33086679dcc1SBarry Smith         break;
33096679dcc1SBarry Smith       case 4:
33106679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
33119566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33126679dcc1SBarry Smith         break;
33136679dcc1SBarry Smith       default:
33146679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_N;
33159566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
33166679dcc1SBarry Smith         break;
33176679dcc1SBarry Smith       }
33186679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
33196679dcc1SBarry Smith       break;
33206679dcc1SBarry Smith     }
33216679dcc1SBarry Smith     default:
33226679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
33236679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
33249566063dSJacob Faibussowitsch       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3325a23d5eceSKris Buschelman       break;
3326a23d5eceSKris Buschelman     }
3327a23d5eceSKris Buschelman   }
3328e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
3329a23d5eceSKris Buschelman   b->mbs      = mbs;
3330a23d5eceSKris Buschelman   b->nbs      = nbs;
3331ab93d7beSBarry Smith   if (!skipallocation) {
33322ee49352SLisandro Dalcin     if (!b->imax) {
33339566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
333426fbe8dcSKarl Rupp 
33354fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
33362ee49352SLisandro Dalcin     }
3337ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
333826fbe8dcSKarl Rupp     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3339a23d5eceSKris Buschelman     if (!nnz) {
3340a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3341c62bd62aSJed Brown       else if (nz < 0) nz = 1;
33425d2a9ed1SStefano Zampini       nz = PetscMin(nz, nbs);
3343a23d5eceSKris Buschelman       for (i = 0; i < mbs; i++) b->imax[i] = nz;
33449566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, mbs, &nz));
3345a23d5eceSKris Buschelman     } else {
3346c73702f5SBarry Smith       PetscInt64 nz64 = 0;
33479371c9d4SSatish Balay       for (i = 0; i < mbs; i++) {
33489371c9d4SSatish Balay         b->imax[i] = nnz[i];
33499371c9d4SSatish Balay         nz64 += nnz[i];
33509371c9d4SSatish Balay       }
33519566063dSJacob Faibussowitsch       PetscCall(PetscIntCast(nz64, &nz));
3352a23d5eceSKris Buschelman     }
3353a23d5eceSKris Buschelman 
3354a23d5eceSKris Buschelman     /* allocate the matrix space */
33559566063dSJacob Faibussowitsch     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3356672ba085SHong Zhang     if (B->structure_only) {
33579566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz, &b->j));
33589566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
3359672ba085SHong Zhang     } else {
33606679dcc1SBarry Smith       PetscInt nzbs2 = 0;
33619566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
33629566063dSJacob Faibussowitsch       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
33639566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(b->a, nz * bs2));
3364672ba085SHong Zhang     }
33659566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(b->j, nz));
336626fbe8dcSKarl Rupp 
3367672ba085SHong Zhang     if (B->structure_only) {
3368672ba085SHong Zhang       b->singlemalloc = PETSC_FALSE;
3369672ba085SHong Zhang       b->free_a       = PETSC_FALSE;
3370672ba085SHong Zhang     } else {
3371a23d5eceSKris Buschelman       b->singlemalloc = PETSC_TRUE;
3372672ba085SHong Zhang       b->free_a       = PETSC_TRUE;
3373672ba085SHong Zhang     }
3374672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
3375672ba085SHong Zhang 
3376a23d5eceSKris Buschelman     b->i[0] = 0;
3377ad540459SPierre Jolivet     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3378672ba085SHong Zhang 
3379e811da20SHong Zhang   } else {
3380e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3381e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3382ab93d7beSBarry Smith   }
3383a23d5eceSKris Buschelman 
3384a23d5eceSKris Buschelman   b->bs2              = bs2;
3385a23d5eceSKris Buschelman   b->mbs              = mbs;
3386a23d5eceSKris Buschelman   b->nz               = 0;
3387b32cb4a7SJed Brown   b->maxnz            = nz;
3388b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3389cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3390cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
33919566063dSJacob Faibussowitsch   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
33923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3393a23d5eceSKris Buschelman }
3394a23d5eceSKris Buschelman 
339566976f2fSJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3396d71ae5a4SJacob Faibussowitsch {
3397725b52f3SLisandro Dalcin   PetscInt     i, m, nz, nz_max = 0, *nnz;
3398f4259b30SLisandro Dalcin   PetscScalar *values      = NULL;
3399d47bf9aaSJed Brown   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3400725b52f3SLisandro Dalcin 
3401725b52f3SLisandro Dalcin   PetscFunctionBegin;
34025f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
34039566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
34049566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
34059566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
34069566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
34079566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3408d0f46423SBarry Smith   m = B->rmap->n / bs;
3409725b52f3SLisandro Dalcin 
34105f80ce2aSJacob Faibussowitsch   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
34119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &nnz));
3412725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3413cf12db73SBarry Smith     nz = ii[i + 1] - ii[i];
34145f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3415725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3416725b52f3SLisandro Dalcin     nnz[i] = nz;
3417725b52f3SLisandro Dalcin   }
34189566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
34199566063dSJacob Faibussowitsch   PetscCall(PetscFree(nnz));
3420725b52f3SLisandro Dalcin 
3421725b52f3SLisandro Dalcin   values = (PetscScalar *)V;
342248a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3423725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3424cf12db73SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
3425cf12db73SBarry Smith     const PetscInt *icols = jj + ii[i];
3426bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3427cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
34289566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
34293adadaf3SJed Brown     } else {
34303adadaf3SJed Brown       PetscInt j;
34313adadaf3SJed Brown       for (j = 0; j < ncols; j++) {
34323adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
34339566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
34343adadaf3SJed Brown       }
34353adadaf3SJed Brown     }
3436725b52f3SLisandro Dalcin   }
34379566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
34389566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
34399566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
34409566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
34413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3442725b52f3SLisandro Dalcin }
3443725b52f3SLisandro Dalcin 
3444cda14afcSprj- /*@C
344511a5261eSBarry Smith   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3446cda14afcSprj- 
3447cda14afcSprj-   Not Collective
3448cda14afcSprj- 
3449cda14afcSprj-   Input Parameter:
3450fe59aa6dSJacob Faibussowitsch . A - a `MATSEQBAIJ` matrix
3451cda14afcSprj- 
3452cda14afcSprj-   Output Parameter:
3453cda14afcSprj- . array - pointer to the data
3454cda14afcSprj- 
3455cda14afcSprj-   Level: intermediate
3456cda14afcSprj- 
34571cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3458cda14afcSprj- @*/
3459d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array)
3460d71ae5a4SJacob Faibussowitsch {
3461cda14afcSprj-   PetscFunctionBegin;
3462cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
34633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3464cda14afcSprj- }
3465cda14afcSprj- 
3466cda14afcSprj- /*@C
346711a5261eSBarry Smith   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3468cda14afcSprj- 
3469cda14afcSprj-   Not Collective
3470cda14afcSprj- 
3471cda14afcSprj-   Input Parameters:
3472fe59aa6dSJacob Faibussowitsch + A     - a `MATSEQBAIJ` matrix
3473cda14afcSprj- - array - pointer to the data
3474cda14afcSprj- 
3475cda14afcSprj-   Level: intermediate
3476cda14afcSprj- 
34771cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3478cda14afcSprj- @*/
3479d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array)
3480d71ae5a4SJacob Faibussowitsch {
3481cda14afcSprj-   PetscFunctionBegin;
3482cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
34833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3484cda14afcSprj- }
3485cda14afcSprj- 
34860bad9183SKris Buschelman /*MC
3487fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
34880bad9183SKris Buschelman    block sparse compressed row format.
34890bad9183SKris Buschelman 
34900bad9183SKris Buschelman    Options Database Keys:
349120f4b53cSBarry Smith + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
34926679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
34930bad9183SKris Buschelman 
34940bad9183SKris Buschelman    Level: beginner
34950cd7f59aSBarry Smith 
34960cd7f59aSBarry Smith    Notes:
349711a5261eSBarry Smith     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
349811a5261eSBarry Smith     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
34990bad9183SKris Buschelman 
35002ef1f0ffSBarry Smith    Run with `-info` to see what version of the matrix-vector product is being used
35016679dcc1SBarry Smith 
35021cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
35030bad9183SKris Buschelman M*/
35040bad9183SKris Buschelman 
3505cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3506b24902e0SBarry Smith 
3507d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3508d71ae5a4SJacob Faibussowitsch {
3509c1ac3661SBarry Smith   PetscMPIInt  size;
3510b6490206SBarry Smith   Mat_SeqBAIJ *b;
35113b2fbd54SBarry Smith 
35123a40ed3dSBarry Smith   PetscFunctionBegin;
35139566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
35145f80ce2aSJacob Faibussowitsch   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3515b6490206SBarry Smith 
35164dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
3517b0a32e0cSBarry Smith   B->data   = (void *)b;
3518aea10558SJacob Faibussowitsch   B->ops[0] = MatOps_Values;
351926fbe8dcSKarl Rupp 
3520f4259b30SLisandro Dalcin   b->row          = NULL;
3521f4259b30SLisandro Dalcin   b->col          = NULL;
3522f4259b30SLisandro Dalcin   b->icol         = NULL;
35232593348eSBarry Smith   b->reallocs     = 0;
3524f4259b30SLisandro Dalcin   b->saved_values = NULL;
35252593348eSBarry Smith 
3526c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
35272593348eSBarry Smith   b->nonew              = 0;
3528f4259b30SLisandro Dalcin   b->diag               = NULL;
3529f4259b30SLisandro Dalcin   B->spptr              = NULL;
3530b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3531a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
35324e220ebcSLois Curfman McInnes 
35339566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
35349566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
35359566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
35369566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
35379566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
35389566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
35399566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
35409566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
35419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
35429566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
35437ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
35449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
35457ea3e4caSstefano_zampini #endif
35469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
35479566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
35483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
35492593348eSBarry Smith }
35502593348eSBarry Smith 
3551d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3552d71ae5a4SJacob Faibussowitsch {
3553b24902e0SBarry Smith   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3554a96a251dSBarry Smith   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3555de6a44a3SBarry Smith 
35563a40ed3dSBarry Smith   PetscFunctionBegin;
355731fe6a7dSBarry Smith   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
35585f80ce2aSJacob Faibussowitsch   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
35592593348eSBarry Smith 
35604fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35614fd072dbSBarry Smith     c->imax           = a->imax;
35624fd072dbSBarry Smith     c->ilen           = a->ilen;
35634fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
35644fd072dbSBarry Smith   } else {
35659566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3566b6490206SBarry Smith     for (i = 0; i < mbs; i++) {
35672593348eSBarry Smith       c->imax[i] = a->imax[i];
35682593348eSBarry Smith       c->ilen[i] = a->ilen[i];
35692593348eSBarry Smith     }
35704fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
35714fd072dbSBarry Smith   }
35722593348eSBarry Smith 
35732593348eSBarry Smith   /* allocate the matrix space */
357416a2bf60SHong Zhang   if (mallocmatspace) {
35754fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35769566063dSJacob Faibussowitsch       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
357726fbe8dcSKarl Rupp 
35784fd072dbSBarry Smith       c->i            = a->i;
35794fd072dbSBarry Smith       c->j            = a->j;
3580379be0ddSLisandro Dalcin       c->singlemalloc = PETSC_FALSE;
3581379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
3582379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
35834fd072dbSBarry Smith       c->parent       = A;
35841e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
35851e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
358626fbe8dcSKarl Rupp 
35879566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)A));
35889566063dSJacob Faibussowitsch       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35899566063dSJacob Faibussowitsch       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35904fd072dbSBarry Smith     } else {
35919566063dSJacob Faibussowitsch       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
359226fbe8dcSKarl Rupp 
3593c4992f7dSBarry Smith       c->singlemalloc = PETSC_TRUE;
3594379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
35954fd072dbSBarry Smith       c->free_ij      = PETSC_TRUE;
359626fbe8dcSKarl Rupp 
35979566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3598b6490206SBarry Smith       if (mbs > 0) {
35999566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(c->j, a->j, nz));
36002e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
36019566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
36022e8a6d31SBarry Smith         } else {
36039566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(c->a, bs2 * nz));
36042593348eSBarry Smith         }
36052593348eSBarry Smith       }
36061e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
36071e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
360816a2bf60SHong Zhang     }
36094fd072dbSBarry Smith   }
361016a2bf60SHong Zhang 
36112593348eSBarry Smith   c->roworiented = a->roworiented;
36122593348eSBarry Smith   c->nonew       = a->nonew;
361326fbe8dcSKarl Rupp 
36149566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
36159566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
361626fbe8dcSKarl Rupp 
36175c9eb25fSBarry Smith   c->bs2 = a->bs2;
36185c9eb25fSBarry Smith   c->mbs = a->mbs;
36195c9eb25fSBarry Smith   c->nbs = a->nbs;
36202593348eSBarry Smith 
36212593348eSBarry Smith   if (a->diag) {
36224fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
36234fd072dbSBarry Smith       c->diag      = a->diag;
36244fd072dbSBarry Smith       c->free_diag = PETSC_FALSE;
36254fd072dbSBarry Smith     } else {
36269566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
362726fbe8dcSKarl Rupp       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
36284fd072dbSBarry Smith       c->free_diag = PETSC_TRUE;
36294fd072dbSBarry Smith     }
3630f4259b30SLisandro Dalcin   } else c->diag = NULL;
363126fbe8dcSKarl Rupp 
36322593348eSBarry Smith   c->nz         = a->nz;
3633f2cbd3d5SJed Brown   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3634f361c04dSBarry Smith   c->solve_work = NULL;
3635f361c04dSBarry Smith   c->mult_work  = NULL;
3636f361c04dSBarry Smith   c->sor_workt  = NULL;
3637f361c04dSBarry Smith   c->sor_work   = NULL;
363888e51ccdSHong Zhang 
363988e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
364088e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3641cd6b891eSBarry Smith   if (a->compressedrow.use) {
364288e51ccdSHong Zhang     i = a->compressedrow.nrows;
36439566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
36449566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
36459566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
364688e51ccdSHong Zhang   } else {
364788e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
36480298fd71SBarry Smith     c->compressedrow.i      = NULL;
36490298fd71SBarry Smith     c->compressedrow.rindex = NULL;
365088e51ccdSHong Zhang   }
3651c05f355bSMark Adams   c->nonzerorowcnt = a->nonzerorowcnt;
3652e56f5c9eSBarry Smith   C->nonzerostate  = A->nonzerostate;
365326fbe8dcSKarl Rupp 
36549566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
36553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
36562593348eSBarry Smith }
36572593348eSBarry Smith 
3658d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3659d71ae5a4SJacob Faibussowitsch {
3660b24902e0SBarry Smith   PetscFunctionBegin;
36619566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
36629566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
36639566063dSJacob Faibussowitsch   PetscCall(MatSetType(*B, MATSEQBAIJ));
36649566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
36653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3666b24902e0SBarry Smith }
3667b24902e0SBarry Smith 
3668618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3669d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3670d71ae5a4SJacob Faibussowitsch {
3671b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3672b51a4376SLisandro Dalcin   PetscInt    *rowidxs, *colidxs;
3673b51a4376SLisandro Dalcin   PetscScalar *matvals;
3674b51a4376SLisandro Dalcin 
3675b51a4376SLisandro Dalcin   PetscFunctionBegin;
36769566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
3677b51a4376SLisandro Dalcin 
3678b51a4376SLisandro Dalcin   /* read matrix header */
36799566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
36805f80ce2aSJacob Faibussowitsch   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
36819371c9d4SSatish Balay   M  = header[1];
36829371c9d4SSatish Balay   N  = header[2];
36839371c9d4SSatish Balay   nz = header[3];
36845f80ce2aSJacob Faibussowitsch   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
36855f80ce2aSJacob Faibussowitsch   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
36865f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3687b51a4376SLisandro Dalcin 
3688b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
36899566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3690b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3691b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3692b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3693b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3694b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
36959566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
36969566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
3697b51a4376SLisandro Dalcin 
3698b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
36999566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
37005f80ce2aSJacob Faibussowitsch   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
37019566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
37029566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
37039371c9d4SSatish Balay   mbs = m / bs;
37049371c9d4SSatish Balay   nbs = n / bs;
3705b51a4376SLisandro Dalcin 
3706b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
37079566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
37089566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
37099371c9d4SSatish Balay   rowidxs[0] = 0;
37109371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3711b51a4376SLisandro Dalcin   sum = rowidxs[m];
37125f80ce2aSJacob Faibussowitsch   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3713b51a4376SLisandro Dalcin 
3714b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
37159566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
37169566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
37179566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3718b51a4376SLisandro Dalcin 
3719b51a4376SLisandro Dalcin   {               /* preallocate matrix storage */
3720b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3721b51a4376SLisandro Dalcin     PetscInt *nnz;
3722618cc2edSLisandro Dalcin     PetscBool sbaij;
3723b51a4376SLisandro Dalcin 
37249566063dSJacob Faibussowitsch     PetscCall(PetscBTCreate(nbs, &bt));
37259566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(mbs, &nnz));
37269566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3727b51a4376SLisandro Dalcin     for (i = 0; i < mbs; i++) {
37289566063dSJacob Faibussowitsch       PetscCall(PetscBTMemzero(nbs, bt));
3729618cc2edSLisandro Dalcin       for (k = 0; k < bs; k++) {
3730618cc2edSLisandro Dalcin         PetscInt row = bs * i + k;
3731618cc2edSLisandro Dalcin         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3732618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3733618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3734618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3735618cc2edSLisandro Dalcin         }
3736618cc2edSLisandro Dalcin       }
3737b51a4376SLisandro Dalcin     }
37389566063dSJacob Faibussowitsch     PetscCall(PetscBTDestroy(&bt));
37399566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
37409566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
37419566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
3742b51a4376SLisandro Dalcin   }
3743b51a4376SLisandro Dalcin 
3744b51a4376SLisandro Dalcin   /* store matrix values */
3745b51a4376SLisandro Dalcin   for (i = 0; i < m; i++) {
3746b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
37479566063dSJacob Faibussowitsch     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3748b51a4376SLisandro Dalcin   }
3749b51a4376SLisandro Dalcin 
37509566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
37519566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
37529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
37539566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
37543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3755b51a4376SLisandro Dalcin }
3756b51a4376SLisandro Dalcin 
3757d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3758d71ae5a4SJacob Faibussowitsch {
37597f489da9SVaclav Hapla   PetscBool isbinary;
3760f501eaabSShri Abhyankar 
3761f501eaabSShri Abhyankar   PetscFunctionBegin;
37629566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
37635f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
37649566063dSJacob Faibussowitsch   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
37653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3766f501eaabSShri Abhyankar }
3767f501eaabSShri Abhyankar 
3768273d9f13SBarry Smith /*@C
376911a5261eSBarry Smith   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3770273d9f13SBarry Smith   compressed row) format.  For good matrix assembly performance the
377120f4b53cSBarry Smith   user should preallocate the matrix storage by setting the parameter `nz`
377220f4b53cSBarry Smith   (or the array `nnz`).
37732593348eSBarry Smith 
3774d083f849SBarry Smith   Collective
3775273d9f13SBarry Smith 
3776273d9f13SBarry Smith   Input Parameters:
377711a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
377811a5261eSBarry Smith . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
377911a5261eSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3780273d9f13SBarry Smith . m    - number of rows
3781273d9f13SBarry Smith . n    - number of columns
378235d8aa7fSBarry Smith . nz   - number of nonzero blocks  per block row (same for all rows)
378335d8aa7fSBarry Smith - nnz  - array containing the number of nonzero blocks in the various block rows
378420f4b53cSBarry Smith          (possibly different for each block row) or `NULL`
3785273d9f13SBarry Smith 
3786273d9f13SBarry Smith   Output Parameter:
3787273d9f13SBarry Smith . A - the matrix
3788273d9f13SBarry Smith 
3789273d9f13SBarry Smith   Options Database Keys:
379011a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3791a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3792273d9f13SBarry Smith 
3793273d9f13SBarry Smith   Level: intermediate
3794273d9f13SBarry Smith 
3795273d9f13SBarry Smith   Notes:
379677433607SBarry Smith   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
37972ef1f0ffSBarry Smith   MatXXXXSetPreallocation() paradigm instead of this routine directly.
37982ef1f0ffSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
37992ef1f0ffSBarry Smith 
3800d1be2dadSMatthew Knepley   The number of rows and columns must be divisible by blocksize.
3801d1be2dadSMatthew Knepley 
38022ef1f0ffSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
380349a6f317SBarry Smith 
380435d8aa7fSBarry Smith   A nonzero block is any block that as 1 or more nonzeros in it
380535d8aa7fSBarry Smith 
38062ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3807273d9f13SBarry Smith   storage.  That is, the stored row and column indices can begin at
380820f4b53cSBarry Smith   either one (as in Fortran) or zero.
3809273d9f13SBarry Smith 
38102ef1f0ffSBarry Smith   Specify the preallocated storage with either `nz` or `nnz` (not both).
38112ef1f0ffSBarry Smith   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3812651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3813273d9f13SBarry Smith   matrices.
3814273d9f13SBarry Smith 
38151cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3816273d9f13SBarry Smith @*/
3817d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3818d71ae5a4SJacob Faibussowitsch {
3819273d9f13SBarry Smith   PetscFunctionBegin;
38209566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
38219566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
38229566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJ));
38239566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
38243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3825273d9f13SBarry Smith }
3826273d9f13SBarry Smith 
3827273d9f13SBarry Smith /*@C
3828273d9f13SBarry Smith   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3829273d9f13SBarry Smith   per row in the matrix. For good matrix assembly performance the
383020f4b53cSBarry Smith   user should preallocate the matrix storage by setting the parameter `nz`
383120f4b53cSBarry Smith   (or the array `nnz`).
3832273d9f13SBarry Smith 
3833d083f849SBarry Smith   Collective
3834273d9f13SBarry Smith 
3835273d9f13SBarry Smith   Input Parameters:
38361c4f3114SJed Brown + B   - the matrix
383711a5261eSBarry Smith . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
383811a5261eSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3839273d9f13SBarry Smith . nz  - number of block nonzeros per block row (same for all rows)
3840273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows
38412ef1f0ffSBarry Smith          (possibly different for each block row) or `NULL`
3842273d9f13SBarry Smith 
3843273d9f13SBarry Smith   Options Database Keys:
384411a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3845a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3846273d9f13SBarry Smith 
3847273d9f13SBarry Smith   Level: intermediate
3848273d9f13SBarry Smith 
3849273d9f13SBarry Smith   Notes:
38502ef1f0ffSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
385149a6f317SBarry Smith 
385211a5261eSBarry Smith   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3853aa95bbe8SBarry Smith   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
385420f4b53cSBarry Smith   You can also run with the option `-info` and look for messages with the string
3855aa95bbe8SBarry Smith   malloc in them to see if additional memory allocation was needed.
3856aa95bbe8SBarry Smith 
38572ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3858273d9f13SBarry Smith   storage.  That is, the stored row and column indices can begin at
385920f4b53cSBarry Smith   either one (as in Fortran) or zero.
3860273d9f13SBarry Smith 
3861273d9f13SBarry Smith   Specify the preallocated storage with either nz or nnz (not both).
38622ef1f0ffSBarry Smith   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3863651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3864273d9f13SBarry Smith 
38651cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3866273d9f13SBarry Smith @*/
3867d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3868d71ae5a4SJacob Faibussowitsch {
3869273d9f13SBarry Smith   PetscFunctionBegin;
38706ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
38716ba663aaSJed Brown   PetscValidType(B, 1);
38726ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3873cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
38743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3875273d9f13SBarry Smith }
3876a1d92eedSBarry Smith 
3877725b52f3SLisandro Dalcin /*@C
387811a5261eSBarry Smith   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3879725b52f3SLisandro Dalcin 
3880d083f849SBarry Smith   Collective
3881725b52f3SLisandro Dalcin 
3882725b52f3SLisandro Dalcin   Input Parameters:
38831c4f3114SJed Brown + B  - the matrix
388420f4b53cSBarry Smith . bs - the blocksize
38852ef1f0ffSBarry Smith . i  - the indices into `j` for the start of each local row (starts with zero)
3886725b52f3SLisandro Dalcin . j  - the column indices for each local row (starts with zero) these must be sorted for each row
3887725b52f3SLisandro Dalcin - v  - optional values in the matrix
3888725b52f3SLisandro Dalcin 
3889664954b6SBarry Smith   Level: advanced
3890725b52f3SLisandro Dalcin 
38913adadaf3SJed Brown   Notes:
389211a5261eSBarry Smith   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
389311a5261eSBarry Smith   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
38943adadaf3SJed Brown   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
389511a5261eSBarry Smith   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
38963adadaf3SJed Brown   block column and the second index is over columns within a block.
38973adadaf3SJed Brown 
3898664954b6SBarry Smith   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3899664954b6SBarry Smith 
39001cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3901725b52f3SLisandro Dalcin @*/
3902d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3903d71ae5a4SJacob Faibussowitsch {
3904725b52f3SLisandro Dalcin   PetscFunctionBegin;
39056ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
39066ba663aaSJed Brown   PetscValidType(B, 1);
39076ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3908cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
39093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3910725b52f3SLisandro Dalcin }
3911725b52f3SLisandro Dalcin 
3912c75a6043SHong Zhang /*@
391311a5261eSBarry Smith   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3914c75a6043SHong Zhang 
3915d083f849SBarry Smith   Collective
3916c75a6043SHong Zhang 
3917c75a6043SHong Zhang   Input Parameters:
3918c75a6043SHong Zhang + comm - must be an MPI communicator of size 1
3919c75a6043SHong Zhang . bs   - size of block
3920c75a6043SHong Zhang . m    - number of rows
3921c75a6043SHong Zhang . n    - number of columns
3922483a2f95SBarry Smith . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3923c75a6043SHong Zhang . j    - column indices
3924c75a6043SHong Zhang - a    - matrix values
3925c75a6043SHong Zhang 
3926c75a6043SHong Zhang   Output Parameter:
3927c75a6043SHong Zhang . mat - the matrix
3928c75a6043SHong Zhang 
3929dfb205c3SBarry Smith   Level: advanced
3930c75a6043SHong Zhang 
3931c75a6043SHong Zhang   Notes:
39322ef1f0ffSBarry Smith   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3933c75a6043SHong Zhang   once the matrix is destroyed
3934c75a6043SHong Zhang 
3935c75a6043SHong Zhang   You cannot set new nonzero locations into this matrix, that will generate an error.
3936c75a6043SHong Zhang 
39372ef1f0ffSBarry Smith   The `i` and `j` indices are 0 based
3938c75a6043SHong Zhang 
393911a5261eSBarry Smith   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3940dfb205c3SBarry Smith 
39413adadaf3SJed Brown   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
39423adadaf3SJed Brown   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
39433adadaf3SJed Brown   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
39443adadaf3SJed Brown   with column-major ordering within blocks.
3945dfb205c3SBarry Smith 
39461cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3947c75a6043SHong Zhang @*/
3948d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3949d71ae5a4SJacob Faibussowitsch {
3950c75a6043SHong Zhang   Mat_SeqBAIJ *baij;
3951c75a6043SHong Zhang 
3952c75a6043SHong Zhang   PetscFunctionBegin;
39535f80ce2aSJacob Faibussowitsch   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
39545f80ce2aSJacob Faibussowitsch   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3955c75a6043SHong Zhang 
39569566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
39579566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, m, n));
39589566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATSEQBAIJ));
39599566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3960c75a6043SHong Zhang   baij = (Mat_SeqBAIJ *)(*mat)->data;
39619566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3962c75a6043SHong Zhang 
3963c75a6043SHong Zhang   baij->i = i;
3964c75a6043SHong Zhang   baij->j = j;
3965c75a6043SHong Zhang   baij->a = a;
396626fbe8dcSKarl Rupp 
3967c75a6043SHong Zhang   baij->singlemalloc   = PETSC_FALSE;
3968c75a6043SHong Zhang   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3969e6b907acSBarry Smith   baij->free_a         = PETSC_FALSE;
3970e6b907acSBarry Smith   baij->free_ij        = PETSC_FALSE;
3971ceb5bf51SJacob Faibussowitsch   baij->free_imax_ilen = PETSC_TRUE;
3972c75a6043SHong Zhang 
3973ceb5bf51SJacob Faibussowitsch   for (PetscInt ii = 0; ii < m; ii++) {
3974ceb5bf51SJacob Faibussowitsch     const PetscInt row_len = i[ii + 1] - i[ii];
3975ceb5bf51SJacob Faibussowitsch 
3976ceb5bf51SJacob Faibussowitsch     baij->ilen[ii] = baij->imax[ii] = row_len;
3977ceb5bf51SJacob Faibussowitsch     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3978c75a6043SHong Zhang   }
397976bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
3980ceb5bf51SJacob Faibussowitsch     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
39816bdcaf15SBarry Smith       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
39826bdcaf15SBarry Smith       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3983c75a6043SHong Zhang     }
398476bd3646SJed Brown   }
3985c75a6043SHong Zhang 
39869566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
39879566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
39883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3989c75a6043SHong Zhang }
3990bdf6f3fcSHong Zhang 
3991d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3992d71ae5a4SJacob Faibussowitsch {
3993bdf6f3fcSHong Zhang   PetscFunctionBegin;
39949566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
39953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3996bdf6f3fcSHong Zhang }
3997