xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 6497c311e7b976d467be1503c1effce92a60525c)
12593348eSBarry Smith /*
2b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
32593348eSBarry Smith   matrix storage format.
42593348eSBarry Smith */
5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
6c6db04a5SJed Brown #include <petscblaslapack.h>
7af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
943516a2dSKris Buschelman 
1026cec326SBarry Smith /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
1126cec326SBarry Smith #define TYPE BAIJ
1226cec326SBarry Smith #define TYPE_BS
1326cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1426cec326SBarry Smith #undef TYPE_BS
1526cec326SBarry Smith #define TYPE_BS _BS
1626cec326SBarry Smith #define TYPE_BS_ON
1726cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1826cec326SBarry Smith #undef TYPE_BS
1926cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmat.h"
2026cec326SBarry Smith #undef TYPE
2126cec326SBarry Smith #undef TYPE_BS_ON
2226cec326SBarry Smith 
237ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
247ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
257ea3e4caSstefano_zampini #endif
267ea3e4caSstefano_zampini 
27b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29b5b72c8aSIrina Sokolova #endif
30c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31b5b72c8aSIrina Sokolova 
32ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
33d71ae5a4SJacob Faibussowitsch {
349463ebdaSPierre Jolivet   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
35ff6a9541SJacob Faibussowitsch   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
369463ebdaSPierre Jolivet   MatScalar   *a_val = a_aij->a;
379463ebdaSPierre Jolivet 
389463ebdaSPierre Jolivet   PetscFunctionBegin;
399566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
40ff6a9541SJacob Faibussowitsch   PetscCall(PetscArrayzero(reductions, n));
419463ebdaSPierre Jolivet   if (type == NORM_2) {
42ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
439463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
449463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
45857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
469463ebdaSPierre Jolivet           a_val++;
479463ebdaSPierre Jolivet         }
489463ebdaSPierre Jolivet       }
499463ebdaSPierre Jolivet     }
509463ebdaSPierre Jolivet   } else if (type == NORM_1) {
51ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
529463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
539463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
54857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
559463ebdaSPierre Jolivet           a_val++;
569463ebdaSPierre Jolivet         }
579463ebdaSPierre Jolivet       }
589463ebdaSPierre Jolivet     }
599463ebdaSPierre Jolivet   } else if (type == NORM_INFINITY) {
60ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
619463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
629463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
63*6497c311SBarry Smith           PetscInt col    = A->cmap->rstart + a_aij->j[i] * bs + jb;
64857cbf51SRichard Tran Mills           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
659463ebdaSPierre Jolivet           a_val++;
669463ebdaSPierre Jolivet         }
679463ebdaSPierre Jolivet       }
689463ebdaSPierre Jolivet     }
69857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
70ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
71857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
72857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
73857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
74857cbf51SRichard Tran Mills           a_val++;
75857cbf51SRichard Tran Mills         }
76857cbf51SRichard Tran Mills       }
77857cbf51SRichard Tran Mills     }
78857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
80857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
81857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
82857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
83857cbf51SRichard Tran Mills           a_val++;
84857cbf51SRichard Tran Mills         }
85857cbf51SRichard Tran Mills       }
86857cbf51SRichard Tran Mills     }
87857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
889463ebdaSPierre Jolivet   if (type == NORM_2) {
89ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
90857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
91ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
929463ebdaSPierre Jolivet   }
933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
949463ebdaSPierre Jolivet }
959463ebdaSPierre Jolivet 
9666976f2fSJacob Faibussowitsch static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
97d71ae5a4SJacob Faibussowitsch {
98b01c7715SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
99de80f912SBarry Smith   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
1007f0c90edSBarry Smith   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
10162bba022SBarry Smith   PetscReal    shift = 0.0;
1021a9391e3SHong Zhang   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
103b01c7715SBarry Smith 
104b01c7715SBarry Smith   PetscFunctionBegin;
105a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
106a455e926SHong Zhang 
1079797317bSBarry Smith   if (a->idiagvalid) {
1089797317bSBarry Smith     if (values) *values = a->idiag;
1093ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1109797317bSBarry Smith   }
1119566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
112b01c7715SBarry Smith   diag_offset = a->diag;
1134dfa11a4SJacob Faibussowitsch   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
114b01c7715SBarry Smith   diag = a->idiag;
115bbead8a2SBarry Smith   if (values) *values = a->idiag;
116b01c7715SBarry Smith   /* factor and invert each block */
117521d7252SBarry Smith   switch (bs) {
118ab040260SJed Brown   case 1:
119ab040260SJed Brown     for (i = 0; i < mbs; i++) {
120ab040260SJed Brown       odiag   = v + 1 * diag_offset[i];
121ab040260SJed Brown       diag[0] = odiag[0];
122ec1892c8SHong Zhang 
123ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
124ec1892c8SHong Zhang         if (allowzeropivot) {
1257b6c816cSBarry Smith           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1267b6c816cSBarry Smith           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1277b6c816cSBarry Smith           A->factorerror_zeropivot_row   = i;
1289566063dSJacob Faibussowitsch           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
12998921bdaSJacob Faibussowitsch         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
130ec1892c8SHong Zhang       }
131ec1892c8SHong Zhang 
132d4a378daSJed Brown       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
133ab040260SJed Brown       diag += 1;
134ab040260SJed Brown     }
135ab040260SJed Brown     break;
136b01c7715SBarry Smith   case 2:
137b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
138b01c7715SBarry Smith       odiag   = v + 4 * diag_offset[i];
1399371c9d4SSatish Balay       diag[0] = odiag[0];
1409371c9d4SSatish Balay       diag[1] = odiag[1];
1419371c9d4SSatish Balay       diag[2] = odiag[2];
1429371c9d4SSatish Balay       diag[3] = odiag[3];
1439566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1447b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
145b01c7715SBarry Smith       diag += 4;
146b01c7715SBarry Smith     }
147b01c7715SBarry Smith     break;
148b01c7715SBarry Smith   case 3:
149b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
150b01c7715SBarry Smith       odiag   = v + 9 * diag_offset[i];
1519371c9d4SSatish Balay       diag[0] = odiag[0];
1529371c9d4SSatish Balay       diag[1] = odiag[1];
1539371c9d4SSatish Balay       diag[2] = odiag[2];
1549371c9d4SSatish Balay       diag[3] = odiag[3];
1559371c9d4SSatish Balay       diag[4] = odiag[4];
1569371c9d4SSatish Balay       diag[5] = odiag[5];
1579371c9d4SSatish Balay       diag[6] = odiag[6];
1589371c9d4SSatish Balay       diag[7] = odiag[7];
159b01c7715SBarry Smith       diag[8] = odiag[8];
1609566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
1617b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
162b01c7715SBarry Smith       diag += 9;
163b01c7715SBarry Smith     }
164b01c7715SBarry Smith     break;
165b01c7715SBarry Smith   case 4:
166b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
167b01c7715SBarry Smith       odiag = v + 16 * diag_offset[i];
1689566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 16));
1699566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
1707b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
171b01c7715SBarry Smith       diag += 16;
172b01c7715SBarry Smith     }
173b01c7715SBarry Smith     break;
174b01c7715SBarry Smith   case 5:
175b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
176b01c7715SBarry Smith       odiag = v + 25 * diag_offset[i];
1779566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 25));
1789566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
1797b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
180b01c7715SBarry Smith       diag += 25;
181b01c7715SBarry Smith     }
182b01c7715SBarry Smith     break;
183d49b2adcSBarry Smith   case 6:
184d49b2adcSBarry Smith     for (i = 0; i < mbs; i++) {
185d49b2adcSBarry Smith       odiag = v + 36 * diag_offset[i];
1869566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 36));
1879566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
1887b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
189d49b2adcSBarry Smith       diag += 36;
190d49b2adcSBarry Smith     }
191d49b2adcSBarry Smith     break;
192de80f912SBarry Smith   case 7:
193de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
194de80f912SBarry Smith       odiag = v + 49 * diag_offset[i];
1959566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 49));
1969566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
1977b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
198de80f912SBarry Smith       diag += 49;
199de80f912SBarry Smith     }
200de80f912SBarry Smith     break;
201b01c7715SBarry Smith   default:
2029566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
203de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
204de80f912SBarry Smith       odiag = v + bs2 * diag_offset[i];
2059566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, bs2));
2069566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
2077b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
208de80f912SBarry Smith       diag += bs2;
209de80f912SBarry Smith     }
2109566063dSJacob Faibussowitsch     PetscCall(PetscFree2(v_work, v_pivots));
211b01c7715SBarry Smith   }
212b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
2133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
214b01c7715SBarry Smith }
215b01c7715SBarry Smith 
21666976f2fSJacob Faibussowitsch static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
217d71ae5a4SJacob Faibussowitsch {
2186d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
219e48d15efSToby Isaac   PetscScalar       *x, *work, *w, *workt, *t;
220e48d15efSToby Isaac   const MatScalar   *v, *aa = a->a, *idiag;
221e48d15efSToby Isaac   const PetscScalar *b, *xb;
2225455b99fSToby Isaac   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
223e48d15efSToby Isaac   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
224c1ac3661SBarry Smith   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
225b01c7715SBarry Smith 
226b01c7715SBarry Smith   PetscFunctionBegin;
227b01c7715SBarry Smith   its = its * lits;
2285f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
2295f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2305f80ce2aSJacob Faibussowitsch   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
2315f80ce2aSJacob Faibussowitsch   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
2325f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
233b01c7715SBarry Smith 
2349566063dSJacob Faibussowitsch   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
235b01c7715SBarry Smith 
2363ba16761SJacob Faibussowitsch   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
237b01c7715SBarry Smith   diag  = a->diag;
238b01c7715SBarry Smith   idiag = a->idiag;
239de80f912SBarry Smith   k     = PetscMax(A->rmap->n, A->cmap->n);
24048a46eb9SPierre Jolivet   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
24148a46eb9SPierre Jolivet   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
24248a46eb9SPierre Jolivet   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
2433475c22fSBarry Smith   work = a->mult_work;
2443475c22fSBarry Smith   t    = a->sor_workt;
245de80f912SBarry Smith   w    = a->sor_work;
246de80f912SBarry Smith 
2479566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
2489566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
249de80f912SBarry Smith 
250de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
251de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
252e48d15efSToby Isaac       switch (bs) {
253e48d15efSToby Isaac       case 1:
254e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
255e48d15efSToby Isaac         t[0] = b[0];
256e48d15efSToby Isaac         i2   = 1;
257e48d15efSToby Isaac         idiag += 1;
258e48d15efSToby Isaac         for (i = 1; i < m; i++) {
259e48d15efSToby Isaac           v    = aa + ai[i];
260e48d15efSToby Isaac           vi   = aj + ai[i];
261e48d15efSToby Isaac           nz   = diag[i] - ai[i];
262e48d15efSToby Isaac           s[0] = b[i2];
263e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
264e48d15efSToby Isaac             xw[0] = x[vi[j]];
265e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
266e48d15efSToby Isaac           }
267e48d15efSToby Isaac           t[i2] = s[0];
268e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
269e48d15efSToby Isaac           x[i2] = xw[0];
270e48d15efSToby Isaac           idiag += 1;
271e48d15efSToby Isaac           i2 += 1;
272e48d15efSToby Isaac         }
273e48d15efSToby Isaac         break;
274e48d15efSToby Isaac       case 2:
275e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
2769371c9d4SSatish Balay         t[0] = b[0];
2779371c9d4SSatish Balay         t[1] = b[1];
278e48d15efSToby Isaac         i2   = 2;
279e48d15efSToby Isaac         idiag += 4;
280e48d15efSToby Isaac         for (i = 1; i < m; i++) {
281e48d15efSToby Isaac           v    = aa + 4 * ai[i];
282e48d15efSToby Isaac           vi   = aj + ai[i];
283e48d15efSToby Isaac           nz   = diag[i] - ai[i];
2849371c9d4SSatish Balay           s[0] = b[i2];
2859371c9d4SSatish Balay           s[1] = b[i2 + 1];
286e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
287e48d15efSToby Isaac             idx   = 2 * vi[j];
288e48d15efSToby Isaac             it    = 4 * j;
2899371c9d4SSatish Balay             xw[0] = x[idx];
2909371c9d4SSatish Balay             xw[1] = x[1 + idx];
291e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
292e48d15efSToby Isaac           }
2939371c9d4SSatish Balay           t[i2]     = s[0];
2949371c9d4SSatish Balay           t[i2 + 1] = s[1];
295e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
2969371c9d4SSatish Balay           x[i2]     = xw[0];
2979371c9d4SSatish Balay           x[i2 + 1] = xw[1];
298e48d15efSToby Isaac           idiag += 4;
299e48d15efSToby Isaac           i2 += 2;
300e48d15efSToby Isaac         }
301e48d15efSToby Isaac         break;
302e48d15efSToby Isaac       case 3:
303e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
3049371c9d4SSatish Balay         t[0] = b[0];
3059371c9d4SSatish Balay         t[1] = b[1];
3069371c9d4SSatish Balay         t[2] = b[2];
307e48d15efSToby Isaac         i2   = 3;
308e48d15efSToby Isaac         idiag += 9;
309e48d15efSToby Isaac         for (i = 1; i < m; i++) {
310e48d15efSToby Isaac           v    = aa + 9 * ai[i];
311e48d15efSToby Isaac           vi   = aj + ai[i];
312e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3139371c9d4SSatish Balay           s[0] = b[i2];
3149371c9d4SSatish Balay           s[1] = b[i2 + 1];
3159371c9d4SSatish Balay           s[2] = b[i2 + 2];
316e48d15efSToby Isaac           while (nz--) {
317e48d15efSToby Isaac             idx   = 3 * (*vi++);
3189371c9d4SSatish Balay             xw[0] = x[idx];
3199371c9d4SSatish Balay             xw[1] = x[1 + idx];
3209371c9d4SSatish Balay             xw[2] = x[2 + idx];
321e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
322e48d15efSToby Isaac             v += 9;
323e48d15efSToby Isaac           }
3249371c9d4SSatish Balay           t[i2]     = s[0];
3259371c9d4SSatish Balay           t[i2 + 1] = s[1];
3269371c9d4SSatish Balay           t[i2 + 2] = s[2];
327e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
3289371c9d4SSatish Balay           x[i2]     = xw[0];
3299371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3309371c9d4SSatish Balay           x[i2 + 2] = xw[2];
331e48d15efSToby Isaac           idiag += 9;
332e48d15efSToby Isaac           i2 += 3;
333e48d15efSToby Isaac         }
334e48d15efSToby Isaac         break;
335e48d15efSToby Isaac       case 4:
336e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
3379371c9d4SSatish Balay         t[0] = b[0];
3389371c9d4SSatish Balay         t[1] = b[1];
3399371c9d4SSatish Balay         t[2] = b[2];
3409371c9d4SSatish Balay         t[3] = b[3];
341e48d15efSToby Isaac         i2   = 4;
342e48d15efSToby Isaac         idiag += 16;
343e48d15efSToby Isaac         for (i = 1; i < m; i++) {
344e48d15efSToby Isaac           v    = aa + 16 * ai[i];
345e48d15efSToby Isaac           vi   = aj + ai[i];
346e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3479371c9d4SSatish Balay           s[0] = b[i2];
3489371c9d4SSatish Balay           s[1] = b[i2 + 1];
3499371c9d4SSatish Balay           s[2] = b[i2 + 2];
3509371c9d4SSatish Balay           s[3] = b[i2 + 3];
351e48d15efSToby Isaac           while (nz--) {
352e48d15efSToby Isaac             idx   = 4 * (*vi++);
3539371c9d4SSatish Balay             xw[0] = x[idx];
3549371c9d4SSatish Balay             xw[1] = x[1 + idx];
3559371c9d4SSatish Balay             xw[2] = x[2 + idx];
3569371c9d4SSatish Balay             xw[3] = x[3 + idx];
357e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
358e48d15efSToby Isaac             v += 16;
359e48d15efSToby Isaac           }
3609371c9d4SSatish Balay           t[i2]     = s[0];
3619371c9d4SSatish Balay           t[i2 + 1] = s[1];
3629371c9d4SSatish Balay           t[i2 + 2] = s[2];
3639371c9d4SSatish Balay           t[i2 + 3] = s[3];
364e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
3659371c9d4SSatish Balay           x[i2]     = xw[0];
3669371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3679371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3689371c9d4SSatish Balay           x[i2 + 3] = xw[3];
369e48d15efSToby Isaac           idiag += 16;
370e48d15efSToby Isaac           i2 += 4;
371e48d15efSToby Isaac         }
372e48d15efSToby Isaac         break;
373e48d15efSToby Isaac       case 5:
374e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
3759371c9d4SSatish Balay         t[0] = b[0];
3769371c9d4SSatish Balay         t[1] = b[1];
3779371c9d4SSatish Balay         t[2] = b[2];
3789371c9d4SSatish Balay         t[3] = b[3];
3799371c9d4SSatish Balay         t[4] = b[4];
380e48d15efSToby Isaac         i2   = 5;
381e48d15efSToby Isaac         idiag += 25;
382e48d15efSToby Isaac         for (i = 1; i < m; i++) {
383e48d15efSToby Isaac           v    = aa + 25 * ai[i];
384e48d15efSToby Isaac           vi   = aj + ai[i];
385e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3869371c9d4SSatish Balay           s[0] = b[i2];
3879371c9d4SSatish Balay           s[1] = b[i2 + 1];
3889371c9d4SSatish Balay           s[2] = b[i2 + 2];
3899371c9d4SSatish Balay           s[3] = b[i2 + 3];
3909371c9d4SSatish Balay           s[4] = b[i2 + 4];
391e48d15efSToby Isaac           while (nz--) {
392e48d15efSToby Isaac             idx   = 5 * (*vi++);
3939371c9d4SSatish Balay             xw[0] = x[idx];
3949371c9d4SSatish Balay             xw[1] = x[1 + idx];
3959371c9d4SSatish Balay             xw[2] = x[2 + idx];
3969371c9d4SSatish Balay             xw[3] = x[3 + idx];
3979371c9d4SSatish Balay             xw[4] = x[4 + idx];
398e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
399e48d15efSToby Isaac             v += 25;
400e48d15efSToby Isaac           }
4019371c9d4SSatish Balay           t[i2]     = s[0];
4029371c9d4SSatish Balay           t[i2 + 1] = s[1];
4039371c9d4SSatish Balay           t[i2 + 2] = s[2];
4049371c9d4SSatish Balay           t[i2 + 3] = s[3];
4059371c9d4SSatish Balay           t[i2 + 4] = s[4];
406e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
4079371c9d4SSatish Balay           x[i2]     = xw[0];
4089371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4099371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4109371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4119371c9d4SSatish Balay           x[i2 + 4] = xw[4];
412e48d15efSToby Isaac           idiag += 25;
413e48d15efSToby Isaac           i2 += 5;
414e48d15efSToby Isaac         }
415e48d15efSToby Isaac         break;
416e48d15efSToby Isaac       case 6:
417e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
4189371c9d4SSatish Balay         t[0] = b[0];
4199371c9d4SSatish Balay         t[1] = b[1];
4209371c9d4SSatish Balay         t[2] = b[2];
4219371c9d4SSatish Balay         t[3] = b[3];
4229371c9d4SSatish Balay         t[4] = b[4];
4239371c9d4SSatish Balay         t[5] = b[5];
424e48d15efSToby Isaac         i2   = 6;
425e48d15efSToby Isaac         idiag += 36;
426e48d15efSToby Isaac         for (i = 1; i < m; i++) {
427e48d15efSToby Isaac           v    = aa + 36 * ai[i];
428e48d15efSToby Isaac           vi   = aj + ai[i];
429e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4309371c9d4SSatish Balay           s[0] = b[i2];
4319371c9d4SSatish Balay           s[1] = b[i2 + 1];
4329371c9d4SSatish Balay           s[2] = b[i2 + 2];
4339371c9d4SSatish Balay           s[3] = b[i2 + 3];
4349371c9d4SSatish Balay           s[4] = b[i2 + 4];
4359371c9d4SSatish Balay           s[5] = b[i2 + 5];
436e48d15efSToby Isaac           while (nz--) {
437e48d15efSToby Isaac             idx   = 6 * (*vi++);
4389371c9d4SSatish Balay             xw[0] = x[idx];
4399371c9d4SSatish Balay             xw[1] = x[1 + idx];
4409371c9d4SSatish Balay             xw[2] = x[2 + idx];
4419371c9d4SSatish Balay             xw[3] = x[3 + idx];
4429371c9d4SSatish Balay             xw[4] = x[4 + idx];
4439371c9d4SSatish Balay             xw[5] = x[5 + idx];
444e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
445e48d15efSToby Isaac             v += 36;
446e48d15efSToby Isaac           }
4479371c9d4SSatish Balay           t[i2]     = s[0];
4489371c9d4SSatish Balay           t[i2 + 1] = s[1];
4499371c9d4SSatish Balay           t[i2 + 2] = s[2];
4509371c9d4SSatish Balay           t[i2 + 3] = s[3];
4519371c9d4SSatish Balay           t[i2 + 4] = s[4];
4529371c9d4SSatish Balay           t[i2 + 5] = s[5];
453e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
4549371c9d4SSatish Balay           x[i2]     = xw[0];
4559371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4569371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4579371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4589371c9d4SSatish Balay           x[i2 + 4] = xw[4];
4599371c9d4SSatish Balay           x[i2 + 5] = xw[5];
460e48d15efSToby Isaac           idiag += 36;
461e48d15efSToby Isaac           i2 += 6;
462e48d15efSToby Isaac         }
463e48d15efSToby Isaac         break;
464e48d15efSToby Isaac       case 7:
465e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
4669371c9d4SSatish Balay         t[0] = b[0];
4679371c9d4SSatish Balay         t[1] = b[1];
4689371c9d4SSatish Balay         t[2] = b[2];
4699371c9d4SSatish Balay         t[3] = b[3];
4709371c9d4SSatish Balay         t[4] = b[4];
4719371c9d4SSatish Balay         t[5] = b[5];
4729371c9d4SSatish Balay         t[6] = b[6];
473e48d15efSToby Isaac         i2   = 7;
474e48d15efSToby Isaac         idiag += 49;
475e48d15efSToby Isaac         for (i = 1; i < m; i++) {
476e48d15efSToby Isaac           v    = aa + 49 * ai[i];
477e48d15efSToby Isaac           vi   = aj + ai[i];
478e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4799371c9d4SSatish Balay           s[0] = b[i2];
4809371c9d4SSatish Balay           s[1] = b[i2 + 1];
4819371c9d4SSatish Balay           s[2] = b[i2 + 2];
4829371c9d4SSatish Balay           s[3] = b[i2 + 3];
4839371c9d4SSatish Balay           s[4] = b[i2 + 4];
4849371c9d4SSatish Balay           s[5] = b[i2 + 5];
4859371c9d4SSatish Balay           s[6] = b[i2 + 6];
486e48d15efSToby Isaac           while (nz--) {
487e48d15efSToby Isaac             idx   = 7 * (*vi++);
4889371c9d4SSatish Balay             xw[0] = x[idx];
4899371c9d4SSatish Balay             xw[1] = x[1 + idx];
4909371c9d4SSatish Balay             xw[2] = x[2 + idx];
4919371c9d4SSatish Balay             xw[3] = x[3 + idx];
4929371c9d4SSatish Balay             xw[4] = x[4 + idx];
4939371c9d4SSatish Balay             xw[5] = x[5 + idx];
4949371c9d4SSatish Balay             xw[6] = x[6 + idx];
495e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
496e48d15efSToby Isaac             v += 49;
497e48d15efSToby Isaac           }
4989371c9d4SSatish Balay           t[i2]     = s[0];
4999371c9d4SSatish Balay           t[i2 + 1] = s[1];
5009371c9d4SSatish Balay           t[i2 + 2] = s[2];
5019371c9d4SSatish Balay           t[i2 + 3] = s[3];
5029371c9d4SSatish Balay           t[i2 + 4] = s[4];
5039371c9d4SSatish Balay           t[i2 + 5] = s[5];
5049371c9d4SSatish Balay           t[i2 + 6] = s[6];
505e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
5069371c9d4SSatish Balay           x[i2]     = xw[0];
5079371c9d4SSatish Balay           x[i2 + 1] = xw[1];
5089371c9d4SSatish Balay           x[i2 + 2] = xw[2];
5099371c9d4SSatish Balay           x[i2 + 3] = xw[3];
5109371c9d4SSatish Balay           x[i2 + 4] = xw[4];
5119371c9d4SSatish Balay           x[i2 + 5] = xw[5];
5129371c9d4SSatish Balay           x[i2 + 6] = xw[6];
513e48d15efSToby Isaac           idiag += 49;
514e48d15efSToby Isaac           i2 += 7;
515e48d15efSToby Isaac         }
516e48d15efSToby Isaac         break;
517e48d15efSToby Isaac       default:
51896b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
5199566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(t, b, bs));
520de80f912SBarry Smith         i2 = bs;
521de80f912SBarry Smith         idiag += bs2;
522de80f912SBarry Smith         for (i = 1; i < m; i++) {
523de80f912SBarry Smith           v  = aa + bs2 * ai[i];
524de80f912SBarry Smith           vi = aj + ai[i];
525de80f912SBarry Smith           nz = diag[i] - ai[i];
526de80f912SBarry Smith 
5279566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
528de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
529de80f912SBarry Smith           workt = work;
530de80f912SBarry Smith           for (j = 0; j < nz; j++) {
5319566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
532de80f912SBarry Smith             workt += bs;
533de80f912SBarry Smith           }
53496b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
5359566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(t + i2, w, bs));
53696b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
537de80f912SBarry Smith 
538de80f912SBarry Smith           idiag += bs2;
539de80f912SBarry Smith           i2 += bs;
540de80f912SBarry Smith         }
541e48d15efSToby Isaac         break;
542e48d15efSToby Isaac       }
543de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
5449566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
545e48d15efSToby Isaac       xb = t;
5469371c9d4SSatish Balay     } else xb = b;
547de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
548e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
549e48d15efSToby Isaac       i2    = bs * (m - 1);
550e48d15efSToby Isaac       switch (bs) {
551e48d15efSToby Isaac       case 1:
552e48d15efSToby Isaac         s[0] = xb[i2];
553e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
554e48d15efSToby Isaac         x[i2] = xw[0];
555e48d15efSToby Isaac         i2 -= 1;
556e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
557e48d15efSToby Isaac           v    = aa + (diag[i] + 1);
558e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
559e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
560e48d15efSToby Isaac           s[0] = xb[i2];
561e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
562e48d15efSToby Isaac             xw[0] = x[vi[j]];
563e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
564e48d15efSToby Isaac           }
565e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
566e48d15efSToby Isaac           x[i2] = xw[0];
567e48d15efSToby Isaac           idiag -= 1;
568e48d15efSToby Isaac           i2 -= 1;
569e48d15efSToby Isaac         }
570e48d15efSToby Isaac         break;
571e48d15efSToby Isaac       case 2:
5729371c9d4SSatish Balay         s[0] = xb[i2];
5739371c9d4SSatish Balay         s[1] = xb[i2 + 1];
574e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5759371c9d4SSatish Balay         x[i2]     = xw[0];
5769371c9d4SSatish Balay         x[i2 + 1] = xw[1];
577e48d15efSToby Isaac         i2 -= 2;
578e48d15efSToby Isaac         idiag -= 4;
579e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
580e48d15efSToby Isaac           v    = aa + 4 * (diag[i] + 1);
581e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
582e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
5839371c9d4SSatish Balay           s[0] = xb[i2];
5849371c9d4SSatish Balay           s[1] = xb[i2 + 1];
585e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
586e48d15efSToby Isaac             idx   = 2 * vi[j];
587e48d15efSToby Isaac             it    = 4 * j;
5889371c9d4SSatish Balay             xw[0] = x[idx];
5899371c9d4SSatish Balay             xw[1] = x[1 + idx];
590e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
591e48d15efSToby Isaac           }
592e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5939371c9d4SSatish Balay           x[i2]     = xw[0];
5949371c9d4SSatish Balay           x[i2 + 1] = xw[1];
595e48d15efSToby Isaac           idiag -= 4;
596e48d15efSToby Isaac           i2 -= 2;
597e48d15efSToby Isaac         }
598e48d15efSToby Isaac         break;
599e48d15efSToby Isaac       case 3:
6009371c9d4SSatish Balay         s[0] = xb[i2];
6019371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6029371c9d4SSatish Balay         s[2] = xb[i2 + 2];
603e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6049371c9d4SSatish Balay         x[i2]     = xw[0];
6059371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6069371c9d4SSatish Balay         x[i2 + 2] = xw[2];
607e48d15efSToby Isaac         i2 -= 3;
608e48d15efSToby Isaac         idiag -= 9;
609e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
610e48d15efSToby Isaac           v    = aa + 9 * (diag[i] + 1);
611e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
612e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6139371c9d4SSatish Balay           s[0] = xb[i2];
6149371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6159371c9d4SSatish Balay           s[2] = xb[i2 + 2];
616e48d15efSToby Isaac           while (nz--) {
617e48d15efSToby Isaac             idx   = 3 * (*vi++);
6189371c9d4SSatish Balay             xw[0] = x[idx];
6199371c9d4SSatish Balay             xw[1] = x[1 + idx];
6209371c9d4SSatish Balay             xw[2] = x[2 + idx];
621e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
622e48d15efSToby Isaac             v += 9;
623e48d15efSToby Isaac           }
624e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6259371c9d4SSatish Balay           x[i2]     = xw[0];
6269371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6279371c9d4SSatish Balay           x[i2 + 2] = xw[2];
628e48d15efSToby Isaac           idiag -= 9;
629e48d15efSToby Isaac           i2 -= 3;
630e48d15efSToby Isaac         }
631e48d15efSToby Isaac         break;
632e48d15efSToby Isaac       case 4:
6339371c9d4SSatish Balay         s[0] = xb[i2];
6349371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6359371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6369371c9d4SSatish Balay         s[3] = xb[i2 + 3];
637e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6389371c9d4SSatish Balay         x[i2]     = xw[0];
6399371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6409371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6419371c9d4SSatish Balay         x[i2 + 3] = xw[3];
642e48d15efSToby Isaac         i2 -= 4;
643e48d15efSToby Isaac         idiag -= 16;
644e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
645e48d15efSToby Isaac           v    = aa + 16 * (diag[i] + 1);
646e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
647e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6489371c9d4SSatish Balay           s[0] = xb[i2];
6499371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6509371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6519371c9d4SSatish Balay           s[3] = xb[i2 + 3];
652e48d15efSToby Isaac           while (nz--) {
653e48d15efSToby Isaac             idx   = 4 * (*vi++);
6549371c9d4SSatish Balay             xw[0] = x[idx];
6559371c9d4SSatish Balay             xw[1] = x[1 + idx];
6569371c9d4SSatish Balay             xw[2] = x[2 + idx];
6579371c9d4SSatish Balay             xw[3] = x[3 + idx];
658e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
659e48d15efSToby Isaac             v += 16;
660e48d15efSToby Isaac           }
661e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6629371c9d4SSatish Balay           x[i2]     = xw[0];
6639371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6649371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6659371c9d4SSatish Balay           x[i2 + 3] = xw[3];
666e48d15efSToby Isaac           idiag -= 16;
667e48d15efSToby Isaac           i2 -= 4;
668e48d15efSToby Isaac         }
669e48d15efSToby Isaac         break;
670e48d15efSToby Isaac       case 5:
6719371c9d4SSatish Balay         s[0] = xb[i2];
6729371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6739371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6749371c9d4SSatish Balay         s[3] = xb[i2 + 3];
6759371c9d4SSatish Balay         s[4] = xb[i2 + 4];
676e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6779371c9d4SSatish Balay         x[i2]     = xw[0];
6789371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6799371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6809371c9d4SSatish Balay         x[i2 + 3] = xw[3];
6819371c9d4SSatish Balay         x[i2 + 4] = xw[4];
682e48d15efSToby Isaac         i2 -= 5;
683e48d15efSToby Isaac         idiag -= 25;
684e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
685e48d15efSToby Isaac           v    = aa + 25 * (diag[i] + 1);
686e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
687e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6889371c9d4SSatish Balay           s[0] = xb[i2];
6899371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6909371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6919371c9d4SSatish Balay           s[3] = xb[i2 + 3];
6929371c9d4SSatish Balay           s[4] = xb[i2 + 4];
693e48d15efSToby Isaac           while (nz--) {
694e48d15efSToby Isaac             idx   = 5 * (*vi++);
6959371c9d4SSatish Balay             xw[0] = x[idx];
6969371c9d4SSatish Balay             xw[1] = x[1 + idx];
6979371c9d4SSatish Balay             xw[2] = x[2 + idx];
6989371c9d4SSatish Balay             xw[3] = x[3 + idx];
6999371c9d4SSatish Balay             xw[4] = x[4 + idx];
700e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
701e48d15efSToby Isaac             v += 25;
702e48d15efSToby Isaac           }
703e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
7049371c9d4SSatish Balay           x[i2]     = xw[0];
7059371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7069371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7079371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7089371c9d4SSatish Balay           x[i2 + 4] = xw[4];
709e48d15efSToby Isaac           idiag -= 25;
710e48d15efSToby Isaac           i2 -= 5;
711e48d15efSToby Isaac         }
712e48d15efSToby Isaac         break;
713e48d15efSToby Isaac       case 6:
7149371c9d4SSatish Balay         s[0] = xb[i2];
7159371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7169371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7179371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7189371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7199371c9d4SSatish Balay         s[5] = xb[i2 + 5];
720e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7219371c9d4SSatish Balay         x[i2]     = xw[0];
7229371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7239371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7249371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7259371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7269371c9d4SSatish Balay         x[i2 + 5] = xw[5];
727e48d15efSToby Isaac         i2 -= 6;
728e48d15efSToby Isaac         idiag -= 36;
729e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
730e48d15efSToby Isaac           v    = aa + 36 * (diag[i] + 1);
731e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
732e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7339371c9d4SSatish Balay           s[0] = xb[i2];
7349371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7359371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7369371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7379371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7389371c9d4SSatish Balay           s[5] = xb[i2 + 5];
739e48d15efSToby Isaac           while (nz--) {
740e48d15efSToby Isaac             idx   = 6 * (*vi++);
7419371c9d4SSatish Balay             xw[0] = x[idx];
7429371c9d4SSatish Balay             xw[1] = x[1 + idx];
7439371c9d4SSatish Balay             xw[2] = x[2 + idx];
7449371c9d4SSatish Balay             xw[3] = x[3 + idx];
7459371c9d4SSatish Balay             xw[4] = x[4 + idx];
7469371c9d4SSatish Balay             xw[5] = x[5 + idx];
747e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
748e48d15efSToby Isaac             v += 36;
749e48d15efSToby Isaac           }
750e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7519371c9d4SSatish Balay           x[i2]     = xw[0];
7529371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7539371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7549371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7559371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7569371c9d4SSatish Balay           x[i2 + 5] = xw[5];
757e48d15efSToby Isaac           idiag -= 36;
758e48d15efSToby Isaac           i2 -= 6;
759e48d15efSToby Isaac         }
760e48d15efSToby Isaac         break;
761e48d15efSToby Isaac       case 7:
7629371c9d4SSatish Balay         s[0] = xb[i2];
7639371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7649371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7659371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7669371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7679371c9d4SSatish Balay         s[5] = xb[i2 + 5];
7689371c9d4SSatish Balay         s[6] = xb[i2 + 6];
769e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
7709371c9d4SSatish Balay         x[i2]     = xw[0];
7719371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7729371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7739371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7749371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7759371c9d4SSatish Balay         x[i2 + 5] = xw[5];
7769371c9d4SSatish Balay         x[i2 + 6] = xw[6];
777e48d15efSToby Isaac         i2 -= 7;
778e48d15efSToby Isaac         idiag -= 49;
779e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
780e48d15efSToby Isaac           v    = aa + 49 * (diag[i] + 1);
781e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
782e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7839371c9d4SSatish Balay           s[0] = xb[i2];
7849371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7859371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7869371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7879371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7889371c9d4SSatish Balay           s[5] = xb[i2 + 5];
7899371c9d4SSatish Balay           s[6] = xb[i2 + 6];
790e48d15efSToby Isaac           while (nz--) {
791e48d15efSToby Isaac             idx   = 7 * (*vi++);
7929371c9d4SSatish Balay             xw[0] = x[idx];
7939371c9d4SSatish Balay             xw[1] = x[1 + idx];
7949371c9d4SSatish Balay             xw[2] = x[2 + idx];
7959371c9d4SSatish Balay             xw[3] = x[3 + idx];
7969371c9d4SSatish Balay             xw[4] = x[4 + idx];
7979371c9d4SSatish Balay             xw[5] = x[5 + idx];
7989371c9d4SSatish Balay             xw[6] = x[6 + idx];
799e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
800e48d15efSToby Isaac             v += 49;
801e48d15efSToby Isaac           }
802e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
8039371c9d4SSatish Balay           x[i2]     = xw[0];
8049371c9d4SSatish Balay           x[i2 + 1] = xw[1];
8059371c9d4SSatish Balay           x[i2 + 2] = xw[2];
8069371c9d4SSatish Balay           x[i2 + 3] = xw[3];
8079371c9d4SSatish Balay           x[i2 + 4] = xw[4];
8089371c9d4SSatish Balay           x[i2 + 5] = xw[5];
8099371c9d4SSatish Balay           x[i2 + 6] = xw[6];
810e48d15efSToby Isaac           idiag -= 49;
811e48d15efSToby Isaac           i2 -= 7;
812e48d15efSToby Isaac         }
813e48d15efSToby Isaac         break;
814e48d15efSToby Isaac       default:
8159566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(w, xb + i2, bs));
81696b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
817de80f912SBarry Smith         i2 -= bs;
818e48d15efSToby Isaac         idiag -= bs2;
819de80f912SBarry Smith         for (i = m - 2; i >= 0; i--) {
820de80f912SBarry Smith           v  = aa + bs2 * (diag[i] + 1);
821de80f912SBarry Smith           vi = aj + diag[i] + 1;
822de80f912SBarry Smith           nz = ai[i + 1] - diag[i] - 1;
823de80f912SBarry Smith 
8249566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, xb + i2, bs));
825de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
826de80f912SBarry Smith           workt = work;
827de80f912SBarry Smith           for (j = 0; j < nz; j++) {
8289566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
829de80f912SBarry Smith             workt += bs;
830de80f912SBarry Smith           }
83196b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
83296b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
833e48d15efSToby Isaac 
834de80f912SBarry Smith           idiag -= bs2;
835de80f912SBarry Smith           i2 -= bs;
836de80f912SBarry Smith         }
837e48d15efSToby Isaac         break;
838e48d15efSToby Isaac       }
8399566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
840de80f912SBarry Smith     }
841e48d15efSToby Isaac     its--;
842e48d15efSToby Isaac   }
843e48d15efSToby Isaac   while (its--) {
844e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
845e48d15efSToby Isaac       idiag = a->idiag;
846e48d15efSToby Isaac       i2    = 0;
847e48d15efSToby Isaac       switch (bs) {
848e48d15efSToby Isaac       case 1:
849e48d15efSToby Isaac         for (i = 0; i < m; i++) {
850e48d15efSToby Isaac           v    = aa + ai[i];
851e48d15efSToby Isaac           vi   = aj + ai[i];
852e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
853e48d15efSToby Isaac           s[0] = b[i2];
854e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
855e48d15efSToby Isaac             xw[0] = x[vi[j]];
856e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
857e48d15efSToby Isaac           }
858e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
859e48d15efSToby Isaac           x[i2] += xw[0];
860e48d15efSToby Isaac           idiag += 1;
861e48d15efSToby Isaac           i2 += 1;
862e48d15efSToby Isaac         }
863e48d15efSToby Isaac         break;
864e48d15efSToby Isaac       case 2:
865e48d15efSToby Isaac         for (i = 0; i < m; i++) {
866e48d15efSToby Isaac           v    = aa + 4 * ai[i];
867e48d15efSToby Isaac           vi   = aj + ai[i];
868e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8699371c9d4SSatish Balay           s[0] = b[i2];
8709371c9d4SSatish Balay           s[1] = b[i2 + 1];
871e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
872e48d15efSToby Isaac             idx   = 2 * vi[j];
873e48d15efSToby Isaac             it    = 4 * j;
8749371c9d4SSatish Balay             xw[0] = x[idx];
8759371c9d4SSatish Balay             xw[1] = x[1 + idx];
876e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
877e48d15efSToby Isaac           }
878e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
8799371c9d4SSatish Balay           x[i2] += xw[0];
8809371c9d4SSatish Balay           x[i2 + 1] += xw[1];
881e48d15efSToby Isaac           idiag += 4;
882e48d15efSToby Isaac           i2 += 2;
883e48d15efSToby Isaac         }
884e48d15efSToby Isaac         break;
885e48d15efSToby Isaac       case 3:
886e48d15efSToby Isaac         for (i = 0; i < m; i++) {
887e48d15efSToby Isaac           v    = aa + 9 * ai[i];
888e48d15efSToby Isaac           vi   = aj + ai[i];
889e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8909371c9d4SSatish Balay           s[0] = b[i2];
8919371c9d4SSatish Balay           s[1] = b[i2 + 1];
8929371c9d4SSatish Balay           s[2] = b[i2 + 2];
893e48d15efSToby Isaac           while (nz--) {
894e48d15efSToby Isaac             idx   = 3 * (*vi++);
8959371c9d4SSatish Balay             xw[0] = x[idx];
8969371c9d4SSatish Balay             xw[1] = x[1 + idx];
8979371c9d4SSatish Balay             xw[2] = x[2 + idx];
898e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
899e48d15efSToby Isaac             v += 9;
900e48d15efSToby Isaac           }
901e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
9029371c9d4SSatish Balay           x[i2] += xw[0];
9039371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9049371c9d4SSatish Balay           x[i2 + 2] += xw[2];
905e48d15efSToby Isaac           idiag += 9;
906e48d15efSToby Isaac           i2 += 3;
907e48d15efSToby Isaac         }
908e48d15efSToby Isaac         break;
909e48d15efSToby Isaac       case 4:
910e48d15efSToby Isaac         for (i = 0; i < m; i++) {
911e48d15efSToby Isaac           v    = aa + 16 * ai[i];
912e48d15efSToby Isaac           vi   = aj + ai[i];
913e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9149371c9d4SSatish Balay           s[0] = b[i2];
9159371c9d4SSatish Balay           s[1] = b[i2 + 1];
9169371c9d4SSatish Balay           s[2] = b[i2 + 2];
9179371c9d4SSatish Balay           s[3] = b[i2 + 3];
918e48d15efSToby Isaac           while (nz--) {
919e48d15efSToby Isaac             idx   = 4 * (*vi++);
9209371c9d4SSatish Balay             xw[0] = x[idx];
9219371c9d4SSatish Balay             xw[1] = x[1 + idx];
9229371c9d4SSatish Balay             xw[2] = x[2 + idx];
9239371c9d4SSatish Balay             xw[3] = x[3 + idx];
924e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
925e48d15efSToby Isaac             v += 16;
926e48d15efSToby Isaac           }
927e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
9289371c9d4SSatish Balay           x[i2] += xw[0];
9299371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9309371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9319371c9d4SSatish Balay           x[i2 + 3] += xw[3];
932e48d15efSToby Isaac           idiag += 16;
933e48d15efSToby Isaac           i2 += 4;
934e48d15efSToby Isaac         }
935e48d15efSToby Isaac         break;
936e48d15efSToby Isaac       case 5:
937e48d15efSToby Isaac         for (i = 0; i < m; i++) {
938e48d15efSToby Isaac           v    = aa + 25 * ai[i];
939e48d15efSToby Isaac           vi   = aj + ai[i];
940e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9419371c9d4SSatish Balay           s[0] = b[i2];
9429371c9d4SSatish Balay           s[1] = b[i2 + 1];
9439371c9d4SSatish Balay           s[2] = b[i2 + 2];
9449371c9d4SSatish Balay           s[3] = b[i2 + 3];
9459371c9d4SSatish Balay           s[4] = b[i2 + 4];
946e48d15efSToby Isaac           while (nz--) {
947e48d15efSToby Isaac             idx   = 5 * (*vi++);
9489371c9d4SSatish Balay             xw[0] = x[idx];
9499371c9d4SSatish Balay             xw[1] = x[1 + idx];
9509371c9d4SSatish Balay             xw[2] = x[2 + idx];
9519371c9d4SSatish Balay             xw[3] = x[3 + idx];
9529371c9d4SSatish Balay             xw[4] = x[4 + idx];
953e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
954e48d15efSToby Isaac             v += 25;
955e48d15efSToby Isaac           }
956e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
9579371c9d4SSatish Balay           x[i2] += xw[0];
9589371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9599371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9609371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9619371c9d4SSatish Balay           x[i2 + 4] += xw[4];
962e48d15efSToby Isaac           idiag += 25;
963e48d15efSToby Isaac           i2 += 5;
964e48d15efSToby Isaac         }
965e48d15efSToby Isaac         break;
966e48d15efSToby Isaac       case 6:
967e48d15efSToby Isaac         for (i = 0; i < m; i++) {
968e48d15efSToby Isaac           v    = aa + 36 * ai[i];
969e48d15efSToby Isaac           vi   = aj + ai[i];
970e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9719371c9d4SSatish Balay           s[0] = b[i2];
9729371c9d4SSatish Balay           s[1] = b[i2 + 1];
9739371c9d4SSatish Balay           s[2] = b[i2 + 2];
9749371c9d4SSatish Balay           s[3] = b[i2 + 3];
9759371c9d4SSatish Balay           s[4] = b[i2 + 4];
9769371c9d4SSatish Balay           s[5] = b[i2 + 5];
977e48d15efSToby Isaac           while (nz--) {
978e48d15efSToby Isaac             idx   = 6 * (*vi++);
9799371c9d4SSatish Balay             xw[0] = x[idx];
9809371c9d4SSatish Balay             xw[1] = x[1 + idx];
9819371c9d4SSatish Balay             xw[2] = x[2 + idx];
9829371c9d4SSatish Balay             xw[3] = x[3 + idx];
9839371c9d4SSatish Balay             xw[4] = x[4 + idx];
9849371c9d4SSatish Balay             xw[5] = x[5 + idx];
985e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
986e48d15efSToby Isaac             v += 36;
987e48d15efSToby Isaac           }
988e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
9899371c9d4SSatish Balay           x[i2] += xw[0];
9909371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9919371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9929371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9939371c9d4SSatish Balay           x[i2 + 4] += xw[4];
9949371c9d4SSatish Balay           x[i2 + 5] += xw[5];
995e48d15efSToby Isaac           idiag += 36;
996e48d15efSToby Isaac           i2 += 6;
997e48d15efSToby Isaac         }
998e48d15efSToby Isaac         break;
999e48d15efSToby Isaac       case 7:
1000e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1001e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1002e48d15efSToby Isaac           vi   = aj + ai[i];
1003e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10049371c9d4SSatish Balay           s[0] = b[i2];
10059371c9d4SSatish Balay           s[1] = b[i2 + 1];
10069371c9d4SSatish Balay           s[2] = b[i2 + 2];
10079371c9d4SSatish Balay           s[3] = b[i2 + 3];
10089371c9d4SSatish Balay           s[4] = b[i2 + 4];
10099371c9d4SSatish Balay           s[5] = b[i2 + 5];
10109371c9d4SSatish Balay           s[6] = b[i2 + 6];
1011e48d15efSToby Isaac           while (nz--) {
1012e48d15efSToby Isaac             idx   = 7 * (*vi++);
10139371c9d4SSatish Balay             xw[0] = x[idx];
10149371c9d4SSatish Balay             xw[1] = x[1 + idx];
10159371c9d4SSatish Balay             xw[2] = x[2 + idx];
10169371c9d4SSatish Balay             xw[3] = x[3 + idx];
10179371c9d4SSatish Balay             xw[4] = x[4 + idx];
10189371c9d4SSatish Balay             xw[5] = x[5 + idx];
10199371c9d4SSatish Balay             xw[6] = x[6 + idx];
1020e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1021e48d15efSToby Isaac             v += 49;
1022e48d15efSToby Isaac           }
1023e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
10249371c9d4SSatish Balay           x[i2] += xw[0];
10259371c9d4SSatish Balay           x[i2 + 1] += xw[1];
10269371c9d4SSatish Balay           x[i2 + 2] += xw[2];
10279371c9d4SSatish Balay           x[i2 + 3] += xw[3];
10289371c9d4SSatish Balay           x[i2 + 4] += xw[4];
10299371c9d4SSatish Balay           x[i2 + 5] += xw[5];
10309371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1031e48d15efSToby Isaac           idiag += 49;
1032e48d15efSToby Isaac           i2 += 7;
1033e48d15efSToby Isaac         }
1034e48d15efSToby Isaac         break;
1035e48d15efSToby Isaac       default:
1036e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1037e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1038e48d15efSToby Isaac           vi = aj + ai[i];
1039e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1040e48d15efSToby Isaac 
10419566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1042e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1043e48d15efSToby Isaac           workt = work;
1044e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
10459566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1046e48d15efSToby Isaac             workt += bs;
1047e48d15efSToby Isaac           }
1048e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1049e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1050e48d15efSToby Isaac 
1051e48d15efSToby Isaac           idiag += bs2;
1052e48d15efSToby Isaac           i2 += bs;
1053e48d15efSToby Isaac         }
1054e48d15efSToby Isaac         break;
1055e48d15efSToby Isaac       }
10569566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1057e48d15efSToby Isaac     }
1058e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1059e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
1060e48d15efSToby Isaac       i2    = bs * (m - 1);
1061e48d15efSToby Isaac       switch (bs) {
1062e48d15efSToby Isaac       case 1:
1063e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1064e48d15efSToby Isaac           v    = aa + ai[i];
1065e48d15efSToby Isaac           vi   = aj + ai[i];
1066e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
1067e48d15efSToby Isaac           s[0] = b[i2];
1068e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1069e48d15efSToby Isaac             xw[0] = x[vi[j]];
1070e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1071e48d15efSToby Isaac           }
1072e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1073e48d15efSToby Isaac           x[i2] += xw[0];
1074e48d15efSToby Isaac           idiag -= 1;
1075e48d15efSToby Isaac           i2 -= 1;
1076e48d15efSToby Isaac         }
1077e48d15efSToby Isaac         break;
1078e48d15efSToby Isaac       case 2:
1079e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1080e48d15efSToby Isaac           v    = aa + 4 * ai[i];
1081e48d15efSToby Isaac           vi   = aj + ai[i];
1082e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10839371c9d4SSatish Balay           s[0] = b[i2];
10849371c9d4SSatish Balay           s[1] = b[i2 + 1];
1085e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1086e48d15efSToby Isaac             idx   = 2 * vi[j];
1087e48d15efSToby Isaac             it    = 4 * j;
10889371c9d4SSatish Balay             xw[0] = x[idx];
10899371c9d4SSatish Balay             xw[1] = x[1 + idx];
1090e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1091e48d15efSToby Isaac           }
1092e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
10939371c9d4SSatish Balay           x[i2] += xw[0];
10949371c9d4SSatish Balay           x[i2 + 1] += xw[1];
1095e48d15efSToby Isaac           idiag -= 4;
1096e48d15efSToby Isaac           i2 -= 2;
1097e48d15efSToby Isaac         }
1098e48d15efSToby Isaac         break;
1099e48d15efSToby Isaac       case 3:
1100e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1101e48d15efSToby Isaac           v    = aa + 9 * ai[i];
1102e48d15efSToby Isaac           vi   = aj + ai[i];
1103e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11049371c9d4SSatish Balay           s[0] = b[i2];
11059371c9d4SSatish Balay           s[1] = b[i2 + 1];
11069371c9d4SSatish Balay           s[2] = b[i2 + 2];
1107e48d15efSToby Isaac           while (nz--) {
1108e48d15efSToby Isaac             idx   = 3 * (*vi++);
11099371c9d4SSatish Balay             xw[0] = x[idx];
11109371c9d4SSatish Balay             xw[1] = x[1 + idx];
11119371c9d4SSatish Balay             xw[2] = x[2 + idx];
1112e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1113e48d15efSToby Isaac             v += 9;
1114e48d15efSToby Isaac           }
1115e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
11169371c9d4SSatish Balay           x[i2] += xw[0];
11179371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11189371c9d4SSatish Balay           x[i2 + 2] += xw[2];
1119e48d15efSToby Isaac           idiag -= 9;
1120e48d15efSToby Isaac           i2 -= 3;
1121e48d15efSToby Isaac         }
1122e48d15efSToby Isaac         break;
1123e48d15efSToby Isaac       case 4:
1124e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1125e48d15efSToby Isaac           v    = aa + 16 * ai[i];
1126e48d15efSToby Isaac           vi   = aj + ai[i];
1127e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11289371c9d4SSatish Balay           s[0] = b[i2];
11299371c9d4SSatish Balay           s[1] = b[i2 + 1];
11309371c9d4SSatish Balay           s[2] = b[i2 + 2];
11319371c9d4SSatish Balay           s[3] = b[i2 + 3];
1132e48d15efSToby Isaac           while (nz--) {
1133e48d15efSToby Isaac             idx   = 4 * (*vi++);
11349371c9d4SSatish Balay             xw[0] = x[idx];
11359371c9d4SSatish Balay             xw[1] = x[1 + idx];
11369371c9d4SSatish Balay             xw[2] = x[2 + idx];
11379371c9d4SSatish Balay             xw[3] = x[3 + idx];
1138e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1139e48d15efSToby Isaac             v += 16;
1140e48d15efSToby Isaac           }
1141e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
11429371c9d4SSatish Balay           x[i2] += xw[0];
11439371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11449371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11459371c9d4SSatish Balay           x[i2 + 3] += xw[3];
1146e48d15efSToby Isaac           idiag -= 16;
1147e48d15efSToby Isaac           i2 -= 4;
1148e48d15efSToby Isaac         }
1149e48d15efSToby Isaac         break;
1150e48d15efSToby Isaac       case 5:
1151e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1152e48d15efSToby Isaac           v    = aa + 25 * ai[i];
1153e48d15efSToby Isaac           vi   = aj + ai[i];
1154e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11559371c9d4SSatish Balay           s[0] = b[i2];
11569371c9d4SSatish Balay           s[1] = b[i2 + 1];
11579371c9d4SSatish Balay           s[2] = b[i2 + 2];
11589371c9d4SSatish Balay           s[3] = b[i2 + 3];
11599371c9d4SSatish Balay           s[4] = b[i2 + 4];
1160e48d15efSToby Isaac           while (nz--) {
1161e48d15efSToby Isaac             idx   = 5 * (*vi++);
11629371c9d4SSatish Balay             xw[0] = x[idx];
11639371c9d4SSatish Balay             xw[1] = x[1 + idx];
11649371c9d4SSatish Balay             xw[2] = x[2 + idx];
11659371c9d4SSatish Balay             xw[3] = x[3 + idx];
11669371c9d4SSatish Balay             xw[4] = x[4 + idx];
1167e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1168e48d15efSToby Isaac             v += 25;
1169e48d15efSToby Isaac           }
1170e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
11719371c9d4SSatish Balay           x[i2] += xw[0];
11729371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11739371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11749371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11759371c9d4SSatish Balay           x[i2 + 4] += xw[4];
1176e48d15efSToby Isaac           idiag -= 25;
1177e48d15efSToby Isaac           i2 -= 5;
1178e48d15efSToby Isaac         }
1179e48d15efSToby Isaac         break;
1180e48d15efSToby Isaac       case 6:
1181e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1182e48d15efSToby Isaac           v    = aa + 36 * ai[i];
1183e48d15efSToby Isaac           vi   = aj + ai[i];
1184e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11859371c9d4SSatish Balay           s[0] = b[i2];
11869371c9d4SSatish Balay           s[1] = b[i2 + 1];
11879371c9d4SSatish Balay           s[2] = b[i2 + 2];
11889371c9d4SSatish Balay           s[3] = b[i2 + 3];
11899371c9d4SSatish Balay           s[4] = b[i2 + 4];
11909371c9d4SSatish Balay           s[5] = b[i2 + 5];
1191e48d15efSToby Isaac           while (nz--) {
1192e48d15efSToby Isaac             idx   = 6 * (*vi++);
11939371c9d4SSatish Balay             xw[0] = x[idx];
11949371c9d4SSatish Balay             xw[1] = x[1 + idx];
11959371c9d4SSatish Balay             xw[2] = x[2 + idx];
11969371c9d4SSatish Balay             xw[3] = x[3 + idx];
11979371c9d4SSatish Balay             xw[4] = x[4 + idx];
11989371c9d4SSatish Balay             xw[5] = x[5 + idx];
1199e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1200e48d15efSToby Isaac             v += 36;
1201e48d15efSToby Isaac           }
1202e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
12039371c9d4SSatish Balay           x[i2] += xw[0];
12049371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12059371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12069371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12079371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12089371c9d4SSatish Balay           x[i2 + 5] += xw[5];
1209e48d15efSToby Isaac           idiag -= 36;
1210e48d15efSToby Isaac           i2 -= 6;
1211e48d15efSToby Isaac         }
1212e48d15efSToby Isaac         break;
1213e48d15efSToby Isaac       case 7:
1214e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1215e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1216e48d15efSToby Isaac           vi   = aj + ai[i];
1217e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
12189371c9d4SSatish Balay           s[0] = b[i2];
12199371c9d4SSatish Balay           s[1] = b[i2 + 1];
12209371c9d4SSatish Balay           s[2] = b[i2 + 2];
12219371c9d4SSatish Balay           s[3] = b[i2 + 3];
12229371c9d4SSatish Balay           s[4] = b[i2 + 4];
12239371c9d4SSatish Balay           s[5] = b[i2 + 5];
12249371c9d4SSatish Balay           s[6] = b[i2 + 6];
1225e48d15efSToby Isaac           while (nz--) {
1226e48d15efSToby Isaac             idx   = 7 * (*vi++);
12279371c9d4SSatish Balay             xw[0] = x[idx];
12289371c9d4SSatish Balay             xw[1] = x[1 + idx];
12299371c9d4SSatish Balay             xw[2] = x[2 + idx];
12309371c9d4SSatish Balay             xw[3] = x[3 + idx];
12319371c9d4SSatish Balay             xw[4] = x[4 + idx];
12329371c9d4SSatish Balay             xw[5] = x[5 + idx];
12339371c9d4SSatish Balay             xw[6] = x[6 + idx];
1234e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1235e48d15efSToby Isaac             v += 49;
1236e48d15efSToby Isaac           }
1237e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
12389371c9d4SSatish Balay           x[i2] += xw[0];
12399371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12409371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12419371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12429371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12439371c9d4SSatish Balay           x[i2 + 5] += xw[5];
12449371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1245e48d15efSToby Isaac           idiag -= 49;
1246e48d15efSToby Isaac           i2 -= 7;
1247e48d15efSToby Isaac         }
1248e48d15efSToby Isaac         break;
1249e48d15efSToby Isaac       default:
1250e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1251e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1252e48d15efSToby Isaac           vi = aj + ai[i];
1253e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1254e48d15efSToby Isaac 
12559566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1256e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1257e48d15efSToby Isaac           workt = work;
1258e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
12599566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1260e48d15efSToby Isaac             workt += bs;
1261e48d15efSToby Isaac           }
1262e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1263e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1264e48d15efSToby Isaac 
1265e48d15efSToby Isaac           idiag -= bs2;
1266e48d15efSToby Isaac           i2 -= bs;
1267e48d15efSToby Isaac         }
1268e48d15efSToby Isaac         break;
1269e48d15efSToby Isaac       }
12709566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1271e48d15efSToby Isaac     }
1272e48d15efSToby Isaac   }
12739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1276de80f912SBarry Smith }
1277de80f912SBarry Smith 
1278af674e45SBarry Smith /*
127981824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1280af674e45SBarry Smith */
1281af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1282af674e45SBarry Smith   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1283af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1284af674e45SBarry Smith   #define matsetvaluesblocked4_ matsetvaluesblocked4
1285af674e45SBarry Smith #endif
1286af674e45SBarry Smith 
1287d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1288d71ae5a4SJacob Faibussowitsch {
1289af674e45SBarry Smith   Mat                A = *AA;
1290af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1291c1ac3661SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1292c1ac3661SBarry Smith   PetscInt          *ai = a->i, *ailen = a->ilen;
129317ec6a02SBarry Smith   PetscInt          *aj = a->j, stepval, lastcol = -1;
1294f15d580aSBarry Smith   const PetscScalar *value = v;
12954bb09213Spetsc   MatScalar         *ap, *aa = a->a, *bap;
1296af674e45SBarry Smith 
1297af674e45SBarry Smith   PetscFunctionBegin;
1298ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1299af674e45SBarry Smith   stepval = (n - 1) * 4;
1300af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
1301af674e45SBarry Smith     row  = im[k];
1302af674e45SBarry Smith     rp   = aj + ai[row];
1303af674e45SBarry Smith     ap   = aa + 16 * ai[row];
1304af674e45SBarry Smith     nrow = ailen[row];
1305af674e45SBarry Smith     low  = 0;
130617ec6a02SBarry Smith     high = nrow;
1307af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
1308af674e45SBarry Smith       col = in[l];
1309db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1310db4deed7SKarl Rupp       else high = nrow;
131117ec6a02SBarry Smith       lastcol = col;
13121e3347e8SBarry Smith       value   = v + k * (stepval + 4 + l) * 4;
1313af674e45SBarry Smith       while (high - low > 7) {
1314af674e45SBarry Smith         t = (low + high) / 2;
1315af674e45SBarry Smith         if (rp[t] > col) high = t;
1316af674e45SBarry Smith         else low = t;
1317af674e45SBarry Smith       }
1318af674e45SBarry Smith       for (i = low; i < high; i++) {
1319af674e45SBarry Smith         if (rp[i] > col) break;
1320af674e45SBarry Smith         if (rp[i] == col) {
1321af674e45SBarry Smith           bap = ap + 16 * i;
1322af674e45SBarry Smith           for (ii = 0; ii < 4; ii++, value += stepval) {
1323ad540459SPierre Jolivet             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1324af674e45SBarry Smith           }
1325af674e45SBarry Smith           goto noinsert2;
1326af674e45SBarry Smith         }
1327af674e45SBarry Smith       }
1328af674e45SBarry Smith       N = nrow++ - 1;
132917ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
1330af674e45SBarry Smith       /* shift up all the later entries in this row */
1331af674e45SBarry Smith       for (ii = N; ii >= i; ii--) {
1332af674e45SBarry Smith         rp[ii + 1] = rp[ii];
13339566063dSJacob Faibussowitsch         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1334af674e45SBarry Smith       }
133548a46eb9SPierre Jolivet       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1336af674e45SBarry Smith       rp[i] = col;
1337af674e45SBarry Smith       bap   = ap + 16 * i;
1338af674e45SBarry Smith       for (ii = 0; ii < 4; ii++, value += stepval) {
1339ad540459SPierre Jolivet         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1340af674e45SBarry Smith       }
1341af674e45SBarry Smith     noinsert2:;
1342af674e45SBarry Smith       low = i;
1343af674e45SBarry Smith     }
1344af674e45SBarry Smith     ailen[row] = nrow;
1345af674e45SBarry Smith   }
1346be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1347af674e45SBarry Smith }
1348af674e45SBarry Smith 
1349af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1350af674e45SBarry Smith   #define matsetvalues4_ MATSETVALUES4
1351af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1352af674e45SBarry Smith   #define matsetvalues4_ matsetvalues4
1353af674e45SBarry Smith #endif
1354af674e45SBarry Smith 
1355d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1356d71ae5a4SJacob Faibussowitsch {
1357af674e45SBarry Smith   Mat          A = *AA;
1358af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1359580bdb30SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1360c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
1361c1ac3661SBarry Smith   PetscInt    *aj = a->j, brow, bcol;
136217ec6a02SBarry Smith   PetscInt     ridx, cidx, lastcol = -1;
1363af674e45SBarry Smith   MatScalar   *ap, value, *aa      = a->a, *bap;
1364af674e45SBarry Smith 
1365af674e45SBarry Smith   PetscFunctionBegin;
1366af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
13679371c9d4SSatish Balay     row  = im[k];
13689371c9d4SSatish Balay     brow = row / 4;
1369af674e45SBarry Smith     rp   = aj + ai[brow];
1370af674e45SBarry Smith     ap   = aa + 16 * ai[brow];
1371af674e45SBarry Smith     nrow = ailen[brow];
1372af674e45SBarry Smith     low  = 0;
137317ec6a02SBarry Smith     high = nrow;
1374af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
13759371c9d4SSatish Balay       col   = in[l];
13769371c9d4SSatish Balay       bcol  = col / 4;
13779371c9d4SSatish Balay       ridx  = row % 4;
13789371c9d4SSatish Balay       cidx  = col % 4;
1379af674e45SBarry Smith       value = v[l + k * n];
1380db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1381db4deed7SKarl Rupp       else high = nrow;
138217ec6a02SBarry Smith       lastcol = col;
1383af674e45SBarry Smith       while (high - low > 7) {
1384af674e45SBarry Smith         t = (low + high) / 2;
1385af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1386af674e45SBarry Smith         else low = t;
1387af674e45SBarry Smith       }
1388af674e45SBarry Smith       for (i = low; i < high; i++) {
1389af674e45SBarry Smith         if (rp[i] > bcol) break;
1390af674e45SBarry Smith         if (rp[i] == bcol) {
1391af674e45SBarry Smith           bap = ap + 16 * i + 4 * cidx + ridx;
1392af674e45SBarry Smith           *bap += value;
1393af674e45SBarry Smith           goto noinsert1;
1394af674e45SBarry Smith         }
1395af674e45SBarry Smith       }
1396af674e45SBarry Smith       N = nrow++ - 1;
139717ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1398af674e45SBarry Smith       /* shift up all the later entries in this row */
13999566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
14009566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
14019566063dSJacob Faibussowitsch       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1402af674e45SBarry Smith       rp[i]                        = bcol;
1403af674e45SBarry Smith       ap[16 * i + 4 * cidx + ridx] = value;
1404af674e45SBarry Smith     noinsert1:;
1405af674e45SBarry Smith       low = i;
1406af674e45SBarry Smith     }
1407af674e45SBarry Smith     ailen[brow] = nrow;
1408af674e45SBarry Smith   }
1409be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1410af674e45SBarry Smith }
1411af674e45SBarry Smith 
1412be5855fcSBarry Smith /*
1413be5855fcSBarry Smith      Checks for missing diagonals
1414be5855fcSBarry Smith */
1415d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1416d71ae5a4SJacob Faibussowitsch {
1417be5855fcSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14187734d3b5SMatthew G. Knepley   PetscInt    *diag, *ii = a->i, i;
1419be5855fcSBarry Smith 
1420be5855fcSBarry Smith   PetscFunctionBegin;
14219566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
14222af78befSBarry Smith   *missing = PETSC_FALSE;
14237734d3b5SMatthew G. Knepley   if (A->rmap->n > 0 && !ii) {
14242efa7f71SHong Zhang     *missing = PETSC_TRUE;
14252efa7f71SHong Zhang     if (d) *d = 0;
14269566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
14272efa7f71SHong Zhang   } else {
142801445905SHong Zhang     PetscInt n;
142901445905SHong Zhang     n    = PetscMin(a->mbs, a->nbs);
1430883fce79SBarry Smith     diag = a->diag;
143101445905SHong Zhang     for (i = 0; i < n; i++) {
14327734d3b5SMatthew G. Knepley       if (diag[i] >= ii[i + 1]) {
14332af78befSBarry Smith         *missing = PETSC_TRUE;
14342af78befSBarry Smith         if (d) *d = i;
14359566063dSJacob Faibussowitsch         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1436358d2f5dSShri Abhyankar         break;
14372efa7f71SHong Zhang       }
1438be5855fcSBarry Smith     }
1439be5855fcSBarry Smith   }
14403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1441be5855fcSBarry Smith }
1442be5855fcSBarry Smith 
1443d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1444d71ae5a4SJacob Faibussowitsch {
1445de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
144609f38230SBarry Smith   PetscInt     i, j, m = a->mbs;
1447de6a44a3SBarry Smith 
14483a40ed3dSBarry Smith   PetscFunctionBegin;
144909f38230SBarry Smith   if (!a->diag) {
14509566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(m, &a->diag));
14514fd072dbSBarry Smith     a->free_diag = PETSC_TRUE;
145209f38230SBarry Smith   }
14537fc0212eSBarry Smith   for (i = 0; i < m; i++) {
145409f38230SBarry Smith     a->diag[i] = a->i[i + 1];
1455de6a44a3SBarry Smith     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1456de6a44a3SBarry Smith       if (a->j[j] == i) {
145709f38230SBarry Smith         a->diag[i] = j;
1458de6a44a3SBarry Smith         break;
1459de6a44a3SBarry Smith       }
1460de6a44a3SBarry Smith     }
1461de6a44a3SBarry Smith   }
14623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1463de6a44a3SBarry Smith }
14642593348eSBarry Smith 
1465d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1466d71ae5a4SJacob Faibussowitsch {
14673b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14681a83f524SJed Brown   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
14691a83f524SJed Brown   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
14703b2fbd54SBarry Smith 
14713a40ed3dSBarry Smith   PetscFunctionBegin;
14723b2fbd54SBarry Smith   *nn = n;
14733ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
14743b2fbd54SBarry Smith   if (symmetric) {
14759566063dSJacob Faibussowitsch     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1476553b3c51SBarry Smith     nz = tia[n];
14773b2fbd54SBarry Smith   } else {
14789371c9d4SSatish Balay     tia = a->i;
14799371c9d4SSatish Balay     tja = a->j;
14803b2fbd54SBarry Smith   }
14813b2fbd54SBarry Smith 
1482ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1483ecc77c7aSBarry Smith     (*nn) *= bs;
14848f7157efSSatish Balay     /* malloc & create the natural set of indices */
14859566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((n + 1) * bs, ia));
14869985e31cSBarry Smith     if (n) {
14872462f5fdSStefano Zampini       (*ia)[0] = oshift;
1488ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
14899985e31cSBarry Smith     }
1490ecc77c7aSBarry Smith 
1491ecc77c7aSBarry Smith     for (i = 1; i < n; i++) {
1492ecc77c7aSBarry Smith       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1493ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
14948f7157efSSatish Balay     }
1495ad540459SPierre Jolivet     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1496ecc77c7aSBarry Smith 
14971a83f524SJed Brown     if (inja) {
14989566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz * bs * bs, ja));
14999985e31cSBarry Smith       cnt = 0;
15009985e31cSBarry Smith       for (i = 0; i < n; i++) {
15019985e31cSBarry Smith         for (j = 0; j < bs; j++) {
15029985e31cSBarry Smith           for (k = tia[i]; k < tia[i + 1]; k++) {
1503ad540459SPierre Jolivet             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
15049985e31cSBarry Smith           }
15059985e31cSBarry Smith         }
15069985e31cSBarry Smith       }
15079985e31cSBarry Smith     }
15089985e31cSBarry Smith 
15098f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
15109566063dSJacob Faibussowitsch       PetscCall(PetscFree(tia));
15119566063dSJacob Faibussowitsch       PetscCall(PetscFree(tja));
15128f7157efSSatish Balay     }
1513f6d58c54SBarry Smith   } else if (oshift == 1) {
1514715a17b5SBarry Smith     if (symmetric) {
1515a2ea699eSBarry Smith       nz = tia[A->rmap->n / bs];
1516715a17b5SBarry Smith       /*  add 1 to i and j indices */
1517715a17b5SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1518715a17b5SBarry Smith       *ia = tia;
1519715a17b5SBarry Smith       if (ja) {
1520715a17b5SBarry Smith         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1521715a17b5SBarry Smith         *ja = tja;
1522715a17b5SBarry Smith       }
1523715a17b5SBarry Smith     } else {
1524a2ea699eSBarry Smith       nz = a->i[A->rmap->n / bs];
1525f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
15269566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1527f6d58c54SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1528f6d58c54SBarry Smith       if (ja) {
15299566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(nz, ja));
1530f6d58c54SBarry Smith         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1531f6d58c54SBarry Smith       }
1532715a17b5SBarry Smith     }
15338f7157efSSatish Balay   } else {
15348f7157efSSatish Balay     *ia = tia;
1535ecc77c7aSBarry Smith     if (ja) *ja = tja;
15368f7157efSSatish Balay   }
15373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15383b2fbd54SBarry Smith }
15393b2fbd54SBarry Smith 
1540d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1541d71ae5a4SJacob Faibussowitsch {
15423a40ed3dSBarry Smith   PetscFunctionBegin;
15433ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1544715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
15459566063dSJacob Faibussowitsch     PetscCall(PetscFree(*ia));
15469566063dSJacob Faibussowitsch     if (ja) PetscCall(PetscFree(*ja));
15473b2fbd54SBarry Smith   }
15483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15493b2fbd54SBarry Smith }
15503b2fbd54SBarry Smith 
1551d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1552d71ae5a4SJacob Faibussowitsch {
15532d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15542d61bbb3SSatish Balay 
1555433994e6SBarry Smith   PetscFunctionBegin;
1556b4e2f619SBarry Smith   if (A->hash_active) {
1557b4e2f619SBarry Smith     PetscInt bs;
1558e3c72094SPierre Jolivet     A->ops[0] = a->cops;
1559b4e2f619SBarry Smith     PetscCall(PetscHMapIJVDestroy(&a->ht));
1560b4e2f619SBarry Smith     PetscCall(MatGetBlockSize(A, &bs));
1561b4e2f619SBarry Smith     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1562b4e2f619SBarry Smith     PetscCall(PetscFree(a->dnz));
1563b4e2f619SBarry Smith     PetscCall(PetscFree(a->bdnz));
1564b4e2f619SBarry Smith     A->hash_active = PETSC_FALSE;
1565b4e2f619SBarry Smith   }
15663ba16761SJacob Faibussowitsch   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
15679566063dSJacob Faibussowitsch   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
15689566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
15699566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
15709566063dSJacob Faibussowitsch   if (a->free_diag) PetscCall(PetscFree(a->diag));
15719566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->idiag));
15729566063dSJacob Faibussowitsch   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
15739566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->solve_work));
15749566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->mult_work));
15759566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_workt));
15769566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_work));
15779566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
15789566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->saved_values));
15799566063dSJacob Faibussowitsch   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1580c4319e64SHong Zhang 
15819566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->sbaijMat));
15829566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->parent));
15839566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1584901853e0SKris Buschelman 
15859566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
15869566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
15879566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
15889566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
15899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
15909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
15919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
15929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
15939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
15949566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
15959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
15969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
15977ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
15989566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
15997ea3e4caSstefano_zampini #endif
16009566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
16012e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
16023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16032d61bbb3SSatish Balay }
16042d61bbb3SSatish Balay 
160566976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1606d71ae5a4SJacob Faibussowitsch {
16072d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
16082d61bbb3SSatish Balay 
16092d61bbb3SSatish Balay   PetscFunctionBegin;
1610aa275fccSKris Buschelman   switch (op) {
1611d71ae5a4SJacob Faibussowitsch   case MAT_ROW_ORIENTED:
1612d71ae5a4SJacob Faibussowitsch     a->roworiented = flg;
1613d71ae5a4SJacob Faibussowitsch     break;
1614d71ae5a4SJacob Faibussowitsch   case MAT_KEEP_NONZERO_PATTERN:
1615d71ae5a4SJacob Faibussowitsch     a->keepnonzeropattern = flg;
1616d71ae5a4SJacob Faibussowitsch     break;
1617d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATIONS:
1618d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? 0 : 1);
1619d71ae5a4SJacob Faibussowitsch     break;
1620d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATION_ERR:
1621d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -1 : 0);
1622d71ae5a4SJacob Faibussowitsch     break;
1623d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1624d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -2 : 0);
1625d71ae5a4SJacob Faibussowitsch     break;
1626d71ae5a4SJacob Faibussowitsch   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1627d71ae5a4SJacob Faibussowitsch     a->nounused = (flg ? -1 : 0);
1628d71ae5a4SJacob Faibussowitsch     break;
16298c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1630aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
1631aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
1632d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
1633d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1634d71ae5a4SJacob Faibussowitsch     break;
16355021d80fSJed Brown   case MAT_SPD:
163677e54ba9SKris Buschelman   case MAT_SYMMETRIC:
163777e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
16389a4540c5SBarry Smith   case MAT_HERMITIAN:
16399a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1640b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1641c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1642672ba085SHong Zhang   case MAT_STRUCTURE_ONLY:
1643b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
1644b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
164577e54ba9SKris Buschelman     break;
1646d71ae5a4SJacob Faibussowitsch   default:
1647d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
16482d61bbb3SSatish Balay   }
16493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16502d61bbb3SSatish Balay }
16512d61bbb3SSatish Balay 
165252768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
1653d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1654d71ae5a4SJacob Faibussowitsch {
165552768537SHong Zhang   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
165652768537SHong Zhang   MatScalar   *aa_i;
165787828ca2SBarry Smith   PetscScalar *v_i;
16582d61bbb3SSatish Balay 
16592d61bbb3SSatish Balay   PetscFunctionBegin;
1660d0f46423SBarry Smith   bs  = A->rmap->bs;
166152768537SHong Zhang   bs2 = bs * bs;
16625f80ce2aSJacob Faibussowitsch   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
16632d61bbb3SSatish Balay 
16642d61bbb3SSatish Balay   bn  = row / bs; /* Block number */
16652d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
16662d61bbb3SSatish Balay   M   = ai[bn + 1] - ai[bn];
16672d61bbb3SSatish Balay   *nz = bs * M;
16682d61bbb3SSatish Balay 
16692d61bbb3SSatish Balay   if (v) {
1670f4259b30SLisandro Dalcin     *v = NULL;
16712d61bbb3SSatish Balay     if (*nz) {
16729566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, v));
16732d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16742d61bbb3SSatish Balay         v_i  = *v + i * bs;
16752d61bbb3SSatish Balay         aa_i = aa + bs2 * (ai[bn] + i);
167626fbe8dcSKarl Rupp         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
16772d61bbb3SSatish Balay       }
16782d61bbb3SSatish Balay     }
16792d61bbb3SSatish Balay   }
16802d61bbb3SSatish Balay 
16812d61bbb3SSatish Balay   if (idx) {
1682f4259b30SLisandro Dalcin     *idx = NULL;
16832d61bbb3SSatish Balay     if (*nz) {
16849566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, idx));
16852d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16862d61bbb3SSatish Balay         idx_i = *idx + i * bs;
16872d61bbb3SSatish Balay         itmp  = bs * aj[ai[bn] + i];
168826fbe8dcSKarl Rupp         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
16892d61bbb3SSatish Balay       }
16902d61bbb3SSatish Balay     }
16912d61bbb3SSatish Balay   }
16923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16932d61bbb3SSatish Balay }
16942d61bbb3SSatish Balay 
1695d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1696d71ae5a4SJacob Faibussowitsch {
169752768537SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
169852768537SHong Zhang 
169952768537SHong Zhang   PetscFunctionBegin;
17009566063dSJacob Faibussowitsch   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
17013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
170252768537SHong Zhang }
170352768537SHong Zhang 
1704d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1705d71ae5a4SJacob Faibussowitsch {
17062d61bbb3SSatish Balay   PetscFunctionBegin;
17079566063dSJacob Faibussowitsch   if (idx) PetscCall(PetscFree(*idx));
17089566063dSJacob Faibussowitsch   if (v) PetscCall(PetscFree(*v));
17093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17102d61bbb3SSatish Balay }
17112d61bbb3SSatish Balay 
171266976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1713d71ae5a4SJacob Faibussowitsch {
171420e84f26SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
17152d61bbb3SSatish Balay   Mat          C;
171620e84f26SHong Zhang   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
171720e84f26SHong Zhang   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
171820e84f26SHong Zhang   MatScalar   *ata, *aa = a->a;
17192d61bbb3SSatish Balay 
17202d61bbb3SSatish Balay   PetscFunctionBegin;
17217fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
17229566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1723cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
172420e84f26SHong Zhang     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
17252d61bbb3SSatish Balay 
17269566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
17279566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
17289566063dSJacob Faibussowitsch     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
17299566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
173020e84f26SHong Zhang 
173120e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
173220e84f26SHong Zhang     ati = at->i;
173320e84f26SHong Zhang     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1734fc4dec0aSBarry Smith   } else {
1735fc4dec0aSBarry Smith     C   = *B;
173620e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
173720e84f26SHong Zhang     ati = at->i;
1738fc4dec0aSBarry Smith   }
1739fc4dec0aSBarry Smith 
174020e84f26SHong Zhang   atj = at->j;
174120e84f26SHong Zhang   ata = at->a;
174220e84f26SHong Zhang 
174320e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
17449566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(atfill, ati, nbs));
174520e84f26SHong Zhang 
174620e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
17472d61bbb3SSatish Balay   for (i = 0; i < mbs; i++) {
174820e84f26SHong Zhang     anzj = ai[i + 1] - ai[i];
174920e84f26SHong Zhang     for (j = 0; j < anzj; j++) {
175020e84f26SHong Zhang       atj[atfill[*aj]] = i;
175120e84f26SHong Zhang       for (kr = 0; kr < bs; kr++) {
1752ad540459SPierre Jolivet         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
17532d61bbb3SSatish Balay       }
175420e84f26SHong Zhang       atfill[*aj++] += 1;
175520e84f26SHong Zhang     }
175620e84f26SHong Zhang   }
17579566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
17589566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
17592d61bbb3SSatish Balay 
176020e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
17619566063dSJacob Faibussowitsch   PetscCall(PetscFree(atfill));
176220e84f26SHong Zhang 
1763cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
17649566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
17652d61bbb3SSatish Balay     *B = C;
17662d61bbb3SSatish Balay   } else {
17679566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &C));
17682d61bbb3SSatish Balay   }
17693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17702d61bbb3SSatish Balay }
17712d61bbb3SSatish Balay 
1772ff6a9541SJacob Faibussowitsch static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1773d71ae5a4SJacob Faibussowitsch {
1774453d3561SHong Zhang   Mat Btrans;
1775453d3561SHong Zhang 
1776453d3561SHong Zhang   PetscFunctionBegin;
1777453d3561SHong Zhang   *f = PETSC_FALSE;
1778acd337a6SBarry Smith   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
17799566063dSJacob Faibussowitsch   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
17809566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Btrans));
17813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1782453d3561SHong Zhang }
1783453d3561SHong Zhang 
1784618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1785d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1786d71ae5a4SJacob Faibussowitsch {
1787b51a4376SLisandro Dalcin   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1788b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1789b51a4376SLisandro Dalcin   PetscInt    *rowlens, *colidxs;
1790b51a4376SLisandro Dalcin   PetscScalar *matvals;
17912593348eSBarry Smith 
17923a40ed3dSBarry Smith   PetscFunctionBegin;
17939566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
17943b2fbd54SBarry Smith 
1795b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1796b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1797b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1798b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1799b51a4376SLisandro Dalcin   nz = bs * bs * A->nz;
18002593348eSBarry Smith 
1801b51a4376SLisandro Dalcin   /* write matrix header */
1802b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
18039371c9d4SSatish Balay   header[1] = M;
18049371c9d4SSatish Balay   header[2] = N;
18059371c9d4SSatish Balay   header[3] = nz;
18069566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
18072593348eSBarry Smith 
1808b51a4376SLisandro Dalcin   /* store row lengths */
18099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
1810b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
18119371c9d4SSatish Balay     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
18129566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
18139566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
1814b51a4376SLisandro Dalcin 
1815b51a4376SLisandro Dalcin   /* store column indices  */
18169566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
1817b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1818b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1819b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
18209371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
18215f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
18229566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
18239566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
18242593348eSBarry Smith 
18252593348eSBarry Smith   /* store nonzero values */
18269566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
1827b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1828b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1829b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
18309371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
18315f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
18329566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
18339566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
1834ce6f0cecSBarry Smith 
1835b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
18369566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
18373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18382593348eSBarry Smith }
18392593348eSBarry Smith 
1840d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1841d71ae5a4SJacob Faibussowitsch {
18427dc0baabSHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
18437dc0baabSHong Zhang   PetscInt     i, bs = A->rmap->bs, k;
18447dc0baabSHong Zhang 
18457dc0baabSHong Zhang   PetscFunctionBegin;
18469566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
18477dc0baabSHong Zhang   for (i = 0; i < a->mbs; i++) {
18489566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
184948a46eb9SPierre Jolivet     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
18509566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18517dc0baabSHong Zhang   }
18529566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18547dc0baabSHong Zhang }
18557dc0baabSHong Zhang 
1856d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1857d71ae5a4SJacob Faibussowitsch {
1858b6490206SBarry Smith   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1859d0f46423SBarry Smith   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1860f3ef73ceSBarry Smith   PetscViewerFormat format;
18612593348eSBarry Smith 
18623a40ed3dSBarry Smith   PetscFunctionBegin;
18637dc0baabSHong Zhang   if (A->structure_only) {
18649566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
18653ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
18667dc0baabSHong Zhang   }
18677dc0baabSHong Zhang 
18689566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
1869456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
18709566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1871fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1872ade3a672SBarry Smith     const char *matname;
1873bcd9e38bSBarry Smith     Mat         aij;
18749566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
18759566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
18769566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
18779566063dSJacob Faibussowitsch     PetscCall(MatView(aij, viewer));
18789566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&aij));
187904929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
18803ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1881fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
18829566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
188344cd7ae7SLois Curfman McInnes     for (i = 0; i < a->mbs; i++) {
188444cd7ae7SLois Curfman McInnes       for (j = 0; j < bs; j++) {
18859566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
188644cd7ae7SLois Curfman McInnes         for (k = a->i[i]; k < a->i[i + 1]; k++) {
188744cd7ae7SLois Curfman McInnes           for (l = 0; l < bs; l++) {
1888aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18890e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18909371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18910e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18929371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18930e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18949566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
18950ef38995SBarry Smith             }
189644cd7ae7SLois Curfman McInnes #else
189748a46eb9SPierre Jolivet             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
189844cd7ae7SLois Curfman McInnes #endif
189944cd7ae7SLois Curfman McInnes           }
190044cd7ae7SLois Curfman McInnes         }
19019566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
190244cd7ae7SLois Curfman McInnes       }
190344cd7ae7SLois Curfman McInnes     }
19049566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
19050ef38995SBarry Smith   } else {
19069566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1907b6490206SBarry Smith     for (i = 0; i < a->mbs; i++) {
1908b6490206SBarry Smith       for (j = 0; j < bs; j++) {
19099566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1910b6490206SBarry Smith         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1911b6490206SBarry Smith           for (l = 0; l < bs; l++) {
1912aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
19130e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
19149371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
19150e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
19169371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
19170ef38995SBarry Smith             } else {
19189566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
191988685aaeSLois Curfman McInnes             }
192088685aaeSLois Curfman McInnes #else
19219566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
192288685aaeSLois Curfman McInnes #endif
19232593348eSBarry Smith           }
19242593348eSBarry Smith         }
19259566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
19262593348eSBarry Smith       }
19272593348eSBarry Smith     }
19289566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1929b6490206SBarry Smith   }
19309566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
19313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19322593348eSBarry Smith }
19332593348eSBarry Smith 
19349804daf3SBarry Smith #include <petscdraw.h>
1935d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1936d71ae5a4SJacob Faibussowitsch {
193777ed5343SBarry Smith   Mat               A = (Mat)Aa;
19383270192aSSatish Balay   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1939*6497c311SBarry Smith   PetscInt          row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
19400e6d2581SBarry Smith   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
19413f1db9ecSBarry Smith   MatScalar        *aa;
1942b0a32e0cSBarry Smith   PetscViewer       viewer;
1943b3e7f47fSJed Brown   PetscViewerFormat format;
1944*6497c311SBarry Smith   int               color;
19453270192aSSatish Balay 
19463a40ed3dSBarry Smith   PetscFunctionBegin;
19479566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
19489566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
19499566063dSJacob Faibussowitsch   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
195077ed5343SBarry Smith 
19513270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1952b3e7f47fSJed Brown 
1953b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1954d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1955383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1956b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
19573270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19583270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19599371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19609371c9d4SSatish Balay         y_r = y_l + 1.0;
19619371c9d4SSatish Balay         x_l = a->j[j] * bs;
19629371c9d4SSatish Balay         x_r = x_l + 1.0;
19633270192aSSatish Balay         aa  = a->a + j * bs2;
19643270192aSSatish Balay         for (k = 0; k < bs; k++) {
19653270192aSSatish Balay           for (l = 0; l < bs; l++) {
19660e6d2581SBarry Smith             if (PetscRealPart(*aa++) >= 0.) continue;
19679566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19683270192aSSatish Balay           }
19693270192aSSatish Balay         }
19703270192aSSatish Balay       }
19713270192aSSatish Balay     }
1972b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
19733270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19743270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19759371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19769371c9d4SSatish Balay         y_r = y_l + 1.0;
19779371c9d4SSatish Balay         x_l = a->j[j] * bs;
19789371c9d4SSatish Balay         x_r = x_l + 1.0;
19793270192aSSatish Balay         aa  = a->a + j * bs2;
19803270192aSSatish Balay         for (k = 0; k < bs; k++) {
19813270192aSSatish Balay           for (l = 0; l < bs; l++) {
19820e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
19839566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19843270192aSSatish Balay           }
19853270192aSSatish Balay         }
19863270192aSSatish Balay       }
19873270192aSSatish Balay     }
1988b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
19893270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19903270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19919371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19929371c9d4SSatish Balay         y_r = y_l + 1.0;
19939371c9d4SSatish Balay         x_l = a->j[j] * bs;
19949371c9d4SSatish Balay         x_r = x_l + 1.0;
19953270192aSSatish Balay         aa  = a->a + j * bs2;
19963270192aSSatish Balay         for (k = 0; k < bs; k++) {
19973270192aSSatish Balay           for (l = 0; l < bs; l++) {
19980e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
19999566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
20003270192aSSatish Balay           }
20013270192aSSatish Balay         }
20023270192aSSatish Balay       }
20033270192aSSatish Balay     }
2004d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
2005b3e7f47fSJed Brown   } else {
2006b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
2007b3e7f47fSJed Brown     /* first determine max of all nonzero values */
2008b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
2009b3e7f47fSJed Brown     PetscDraw popup;
2010b3e7f47fSJed Brown 
2011b3e7f47fSJed Brown     for (i = 0; i < a->nz * a->bs2; i++) {
2012b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
2013b3e7f47fSJed Brown     }
2014383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
20159566063dSJacob Faibussowitsch     PetscCall(PetscDrawGetPopup(draw, &popup));
20169566063dSJacob Faibussowitsch     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2017383922c3SLisandro Dalcin 
2018d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
2019b3e7f47fSJed Brown     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2020b3e7f47fSJed Brown       for (j = a->i[i]; j < a->i[i + 1]; j++) {
20219371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
20229371c9d4SSatish Balay         y_r = y_l + 1.0;
20239371c9d4SSatish Balay         x_l = a->j[j] * bs;
20249371c9d4SSatish Balay         x_r = x_l + 1.0;
2025b3e7f47fSJed Brown         aa  = a->a + j * bs2;
2026b3e7f47fSJed Brown         for (k = 0; k < bs; k++) {
2027b3e7f47fSJed Brown           for (l = 0; l < bs; l++) {
2028383922c3SLisandro Dalcin             MatScalar v = *aa++;
2029383922c3SLisandro Dalcin             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
20309566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2031b3e7f47fSJed Brown           }
2032b3e7f47fSJed Brown         }
2033b3e7f47fSJed Brown       }
2034b3e7f47fSJed Brown     }
2035d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
2036b3e7f47fSJed Brown   }
20373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
203877ed5343SBarry Smith }
20393270192aSSatish Balay 
2040d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2041d71ae5a4SJacob Faibussowitsch {
20420e6d2581SBarry Smith   PetscReal xl, yl, xr, yr, w, h;
2043b0a32e0cSBarry Smith   PetscDraw draw;
2044ace3abfcSBarry Smith   PetscBool isnull;
20453270192aSSatish Balay 
204677ed5343SBarry Smith   PetscFunctionBegin;
20479566063dSJacob Faibussowitsch   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
20489566063dSJacob Faibussowitsch   PetscCall(PetscDrawIsNull(draw, &isnull));
20493ba16761SJacob Faibussowitsch   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
205077ed5343SBarry Smith 
20519371c9d4SSatish Balay   xr = A->cmap->n;
20529371c9d4SSatish Balay   yr = A->rmap->N;
20539371c9d4SSatish Balay   h  = yr / 10.0;
20549371c9d4SSatish Balay   w  = xr / 10.0;
20559371c9d4SSatish Balay   xr += w;
20569371c9d4SSatish Balay   yr += h;
20579371c9d4SSatish Balay   xl = -w;
20589371c9d4SSatish Balay   yl = -h;
20599566063dSJacob Faibussowitsch   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
20609566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
20619566063dSJacob Faibussowitsch   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
20629566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
20639566063dSJacob Faibussowitsch   PetscCall(PetscDrawSave(draw));
20643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20653270192aSSatish Balay }
20663270192aSSatish Balay 
2067d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2068d71ae5a4SJacob Faibussowitsch {
2069ace3abfcSBarry Smith   PetscBool iascii, isbinary, isdraw;
20702593348eSBarry Smith 
20713a40ed3dSBarry Smith   PetscFunctionBegin;
20729566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
20739566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
20749566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
207532077d6dSBarry Smith   if (iascii) {
20769566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
20770f5bd95cSBarry Smith   } else if (isbinary) {
20789566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
20790f5bd95cSBarry Smith   } else if (isdraw) {
20809566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
20815cd90555SBarry Smith   } else {
2082a5e6ed63SBarry Smith     Mat B;
20839566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
20849566063dSJacob Faibussowitsch     PetscCall(MatView(B, viewer));
20859566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
20862593348eSBarry Smith   }
20873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20882593348eSBarry Smith }
2089b6490206SBarry Smith 
2090d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2091d71ae5a4SJacob Faibussowitsch {
2092cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2093c1ac3661SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2094c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
2095d0f46423SBarry Smith   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
209697e567efSBarry Smith   MatScalar   *ap, *aa = a->a;
2097cd0e1443SSatish Balay 
20983a40ed3dSBarry Smith   PetscFunctionBegin;
20992d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over rows */
21009371c9d4SSatish Balay     row  = im[k];
21019371c9d4SSatish Balay     brow = row / bs;
21029371c9d4SSatish Balay     if (row < 0) {
21039371c9d4SSatish Balay       v += n;
21049371c9d4SSatish Balay       continue;
21059371c9d4SSatish Balay     } /* negative row */
210654c59aa7SJacob Faibussowitsch     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
21078e3a54c0SPierre Jolivet     rp   = PetscSafePointerPlusOffset(aj, ai[brow]);
21088e3a54c0SPierre Jolivet     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
21092c3acbe9SBarry Smith     nrow = ailen[brow];
21102d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over columns */
21119371c9d4SSatish Balay       if (in[l] < 0) {
21129371c9d4SSatish Balay         v++;
21139371c9d4SSatish Balay         continue;
21149371c9d4SSatish Balay       } /* negative column */
211554c59aa7SJacob Faibussowitsch       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
21162d61bbb3SSatish Balay       col  = in[l];
21172d61bbb3SSatish Balay       bcol = col / bs;
21182d61bbb3SSatish Balay       cidx = col % bs;
21192d61bbb3SSatish Balay       ridx = row % bs;
21202d61bbb3SSatish Balay       high = nrow;
21212d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
21222d61bbb3SSatish Balay       while (high - low > 5) {
2123cd0e1443SSatish Balay         t = (low + high) / 2;
2124cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
2125cd0e1443SSatish Balay         else low = t;
2126cd0e1443SSatish Balay       }
2127cd0e1443SSatish Balay       for (i = low; i < high; i++) {
2128cd0e1443SSatish Balay         if (rp[i] > bcol) break;
2129cd0e1443SSatish Balay         if (rp[i] == bcol) {
21302d61bbb3SSatish Balay           *v++ = ap[bs2 * i + bs * cidx + ridx];
21312d61bbb3SSatish Balay           goto finished;
2132cd0e1443SSatish Balay         }
2133cd0e1443SSatish Balay       }
213497e567efSBarry Smith       *v++ = 0.0;
21352d61bbb3SSatish Balay     finished:;
2136cd0e1443SSatish Balay     }
2137cd0e1443SSatish Balay   }
21383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2139cd0e1443SSatish Balay }
2140cd0e1443SSatish Balay 
2141d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2142d71ae5a4SJacob Faibussowitsch {
214392c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2144e2ee6c50SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2145c1ac3661SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2146d0f46423SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2147ace3abfcSBarry Smith   PetscBool          roworiented = a->roworiented;
2148dd6ea824SBarry Smith   const PetscScalar *value       = v;
21499d243f67SHong Zhang   MatScalar         *ap = NULL, *aa = a->a, *bap;
215092c4ed94SBarry Smith 
21513a40ed3dSBarry Smith   PetscFunctionBegin;
21520e324ae4SSatish Balay   if (roworiented) {
21530e324ae4SSatish Balay     stepval = (n - 1) * bs;
21540e324ae4SSatish Balay   } else {
21550e324ae4SSatish Balay     stepval = (m - 1) * bs;
21560e324ae4SSatish Balay   }
215792c4ed94SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
215892c4ed94SBarry Smith     row = im[k];
21595ef9f2a5SBarry Smith     if (row < 0) continue;
21606bdcaf15SBarry Smith     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
216192c4ed94SBarry Smith     rp = aj + ai[row];
21627dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[row];
216392c4ed94SBarry Smith     rmax = imax[row];
216492c4ed94SBarry Smith     nrow = ailen[row];
216592c4ed94SBarry Smith     low  = 0;
2166c71e6ed7SBarry Smith     high = nrow;
216792c4ed94SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
21685ef9f2a5SBarry Smith       if (in[l] < 0) continue;
21696bdcaf15SBarry Smith       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
217092c4ed94SBarry Smith       col = in[l];
21717dc0baabSHong Zhang       if (!A->structure_only) {
217292c4ed94SBarry Smith         if (roworiented) {
217353ef36baSBarry Smith           value = v + (k * (stepval + bs) + l) * bs;
21740e324ae4SSatish Balay         } else {
217553ef36baSBarry Smith           value = v + (l * (stepval + bs) + k) * bs;
217692c4ed94SBarry Smith         }
21777dc0baabSHong Zhang       }
217826fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
217926fbe8dcSKarl Rupp       else high = nrow;
2180e2ee6c50SBarry Smith       lastcol = col;
218192c4ed94SBarry Smith       while (high - low > 7) {
218292c4ed94SBarry Smith         t = (low + high) / 2;
218392c4ed94SBarry Smith         if (rp[t] > col) high = t;
218492c4ed94SBarry Smith         else low = t;
218592c4ed94SBarry Smith       }
218692c4ed94SBarry Smith       for (i = low; i < high; i++) {
218792c4ed94SBarry Smith         if (rp[i] > col) break;
218892c4ed94SBarry Smith         if (rp[i] == col) {
21897dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
21908a84c255SSatish Balay           bap = ap + bs2 * i;
21910e324ae4SSatish Balay           if (roworiented) {
21928a84c255SSatish Balay             if (is == ADD_VALUES) {
2193dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2194ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2195dd9472c6SBarry Smith               }
21960e324ae4SSatish Balay             } else {
2197dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2198ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2199dd9472c6SBarry Smith               }
2200dd9472c6SBarry Smith             }
22010e324ae4SSatish Balay           } else {
22020e324ae4SSatish Balay             if (is == ADD_VALUES) {
220353ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2204ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
220553ef36baSBarry Smith                 bap += bs;
2206dd9472c6SBarry Smith               }
22070e324ae4SSatish Balay             } else {
220853ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2209ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
221053ef36baSBarry Smith                 bap += bs;
22118a84c255SSatish Balay               }
2212dd9472c6SBarry Smith             }
2213dd9472c6SBarry Smith           }
2214f1241b54SBarry Smith           goto noinsert2;
221592c4ed94SBarry Smith         }
221692c4ed94SBarry Smith       }
221789280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
22185f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
22197dc0baabSHong Zhang       if (A->structure_only) {
22207dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
22217dc0baabSHong Zhang       } else {
2222fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
22237dc0baabSHong Zhang       }
22249371c9d4SSatish Balay       N = nrow++ - 1;
22259371c9d4SSatish Balay       high++;
222692c4ed94SBarry Smith       /* shift up all the later entries in this row */
22279566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
222892c4ed94SBarry Smith       rp[i] = col;
22297dc0baabSHong Zhang       if (!A->structure_only) {
22309566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
22318a84c255SSatish Balay         bap = ap + bs2 * i;
22320e324ae4SSatish Balay         if (roworiented) {
2233dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2234ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2235dd9472c6SBarry Smith           }
22360e324ae4SSatish Balay         } else {
2237dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2238ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2239dd9472c6SBarry Smith           }
2240dd9472c6SBarry Smith         }
22417dc0baabSHong Zhang       }
2242f1241b54SBarry Smith     noinsert2:;
224392c4ed94SBarry Smith       low = i;
224492c4ed94SBarry Smith     }
224592c4ed94SBarry Smith     ailen[row] = nrow;
224692c4ed94SBarry Smith   }
22473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
224892c4ed94SBarry Smith }
224926e093fcSHong Zhang 
2250d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2251d71ae5a4SJacob Faibussowitsch {
2252584200bdSSatish Balay   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2253580bdb30SBarry Smith   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2254d0f46423SBarry Smith   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2255c1ac3661SBarry Smith   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
22563f1db9ecSBarry Smith   MatScalar   *aa    = a->a, *ap;
22573447b6efSHong Zhang   PetscReal    ratio = 0.6;
2258584200bdSSatish Balay 
22593a40ed3dSBarry Smith   PetscFunctionBegin;
2260d32568d8SPierre Jolivet   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2261584200bdSSatish Balay 
226243ee02c3SBarry Smith   if (m) rmax = ailen[0];
2263584200bdSSatish Balay   for (i = 1; i < mbs; i++) {
2264584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
2265584200bdSSatish Balay     fshift += imax[i - 1] - ailen[i - 1];
2266d402145bSBarry Smith     rmax = PetscMax(rmax, ailen[i]);
2267584200bdSSatish Balay     if (fshift) {
2268580bdb30SBarry Smith       ip = aj + ai[i];
2269580bdb30SBarry Smith       ap = aa + bs2 * ai[i];
2270584200bdSSatish Balay       N  = ailen[i];
22719566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(ip - fshift, ip, N));
227248a46eb9SPierre Jolivet       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2273672ba085SHong Zhang     }
2274584200bdSSatish Balay     ai[i] = ai[i - 1] + ailen[i - 1];
2275584200bdSSatish Balay   }
2276584200bdSSatish Balay   if (mbs) {
2277584200bdSSatish Balay     fshift += imax[mbs - 1] - ailen[mbs - 1];
2278584200bdSSatish Balay     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2279584200bdSSatish Balay   }
22807c565772SBarry Smith 
2281584200bdSSatish Balay   /* reset ilen and imax for each row */
22827c565772SBarry Smith   a->nonzerorowcnt = 0;
2283672ba085SHong Zhang   if (A->structure_only) {
22849566063dSJacob Faibussowitsch     PetscCall(PetscFree2(a->imax, a->ilen));
2285672ba085SHong Zhang   } else { /* !A->structure_only */
2286584200bdSSatish Balay     for (i = 0; i < mbs; i++) {
2287584200bdSSatish Balay       ailen[i] = imax[i] = ai[i + 1] - ai[i];
22887c565772SBarry Smith       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2289584200bdSSatish Balay     }
2290672ba085SHong Zhang   }
2291a7c10996SSatish Balay   a->nz = ai[mbs];
2292584200bdSSatish Balay 
2293584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
2294b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
2295ff6a9541SJacob Faibussowitsch   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
22965f80ce2aSJacob Faibussowitsch   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
22979566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
22989566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
22999566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
230026fbe8dcSKarl Rupp 
23018e58a170SBarry Smith   A->info.mallocs += a->reallocs;
2302e2f3b5e9SSatish Balay   a->reallocs         = 0;
23030e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2304647a6520SHong Zhang   a->rmax             = rmax;
2305cf4441caSHong Zhang 
230648a46eb9SPierre Jolivet   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
23073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2308584200bdSSatish Balay }
2309584200bdSSatish Balay 
2310bea157c4SSatish Balay /*
2311bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2312bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2313a5b23f4aSJose E. Roman    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2314bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2315bea157c4SSatish Balay */
2316d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2317d71ae5a4SJacob Faibussowitsch {
2318ff6a9541SJacob Faibussowitsch   PetscInt j = 0;
23193a40ed3dSBarry Smith 
2320433994e6SBarry Smith   PetscFunctionBegin;
2321ff6a9541SJacob Faibussowitsch   for (PetscInt i = 0; i < n; j++) {
2322ff6a9541SJacob Faibussowitsch     PetscInt row = idx[i];
2323a5b23f4aSJose E. Roman     if (row % bs != 0) { /* Not the beginning of a block */
2324bea157c4SSatish Balay       sizes[j] = 1;
2325bea157c4SSatish Balay       i++;
2326e4fda26cSSatish Balay     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2327bea157c4SSatish Balay       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2328bea157c4SSatish Balay       i++;
23296aad120cSJose E. Roman     } else { /* Beginning of the block, so check if the complete block exists */
2330ff6a9541SJacob Faibussowitsch       PetscBool flg = PETSC_TRUE;
2331ff6a9541SJacob Faibussowitsch       for (PetscInt k = 1; k < bs; k++) {
2332bea157c4SSatish Balay         if (row + k != idx[i + k]) { /* break in the block */
2333bea157c4SSatish Balay           flg = PETSC_FALSE;
2334bea157c4SSatish Balay           break;
2335d9b7c43dSSatish Balay         }
2336bea157c4SSatish Balay       }
2337abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2338bea157c4SSatish Balay         sizes[j] = bs;
2339bea157c4SSatish Balay         i += bs;
2340bea157c4SSatish Balay       } else {
2341bea157c4SSatish Balay         sizes[j] = 1;
2342bea157c4SSatish Balay         i++;
2343bea157c4SSatish Balay       }
2344bea157c4SSatish Balay     }
2345bea157c4SSatish Balay   }
2346bea157c4SSatish Balay   *bs_max = j;
23473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2348d9b7c43dSSatish Balay }
2349d9b7c43dSSatish Balay 
2350d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2351d71ae5a4SJacob Faibussowitsch {
2352d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2353f4df32b1SMatthew Knepley   PetscInt           i, j, k, count, *rows;
2354d0f46423SBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
235587828ca2SBarry Smith   PetscScalar        zero = 0.0;
23563f1db9ecSBarry Smith   MatScalar         *aa;
235797b48c8fSBarry Smith   const PetscScalar *xx;
235897b48c8fSBarry Smith   PetscScalar       *bb;
2359d9b7c43dSSatish Balay 
23603a40ed3dSBarry Smith   PetscFunctionBegin;
2361dd8e379bSPierre Jolivet   /* fix right-hand side if needed */
236297b48c8fSBarry Smith   if (x && b) {
23639566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23649566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
2365ad540459SPierre Jolivet     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
23669566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
23679566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
236897b48c8fSBarry Smith   }
236997b48c8fSBarry Smith 
2370d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2371bea157c4SSatish Balay   /* allocate memory for rows,sizes */
23729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2373bea157c4SSatish Balay 
2374563b5814SBarry Smith   /* copy IS values to rows, and sort them */
237526fbe8dcSKarl Rupp   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
23769566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(is_n, rows));
237797b48c8fSBarry Smith 
2378a9817697SBarry Smith   if (baij->keepnonzeropattern) {
237926fbe8dcSKarl Rupp     for (i = 0; i < is_n; i++) sizes[i] = 1;
2380dffd3267SBarry Smith     bs_max = is_n;
2381dffd3267SBarry Smith   } else {
23829566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2383e56f5c9eSBarry Smith     A->nonzerostate++;
2384dffd3267SBarry Smith   }
2385bea157c4SSatish Balay 
2386bea157c4SSatish Balay   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2387bea157c4SSatish Balay     row = rows[j];
23885f80ce2aSJacob Faibussowitsch     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2389bea157c4SSatish Balay     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2390f4f49eeaSPierre Jolivet     aa    = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs);
2391a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2392d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2393bea157c4SSatish Balay         if (baij->ilen[row / bs] > 0) {
2394bea157c4SSatish Balay           baij->ilen[row / bs]       = 1;
2395bea157c4SSatish Balay           baij->j[baij->i[row / bs]] = row / bs;
239626fbe8dcSKarl Rupp 
23979566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(aa, count * bs));
2398a07cd24cSSatish Balay         }
2399563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
24009927e4dfSBarry Smith         for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2401f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2402bea157c4SSatish Balay         baij->ilen[row / bs] = 0;
2403f4df32b1SMatthew Knepley       } /* end (diag == 0.0) */
2404bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
24056bdcaf15SBarry Smith       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2406bea157c4SSatish Balay       for (k = 0; k < count; k++) {
2407d9b7c43dSSatish Balay         aa[0] = zero;
2408d9b7c43dSSatish Balay         aa += bs;
2409d9b7c43dSSatish Balay       }
24109927e4dfSBarry Smith       if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2411d9b7c43dSSatish Balay     }
2412bea157c4SSatish Balay   }
2413bea157c4SSatish Balay 
24149566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, sizes));
24159566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
24163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2417d9b7c43dSSatish Balay }
24181c351548SSatish Balay 
2419ff6a9541SJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2420d71ae5a4SJacob Faibussowitsch {
242197b48c8fSBarry Smith   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
242297b48c8fSBarry Smith   PetscInt           i, j, k, count;
242397b48c8fSBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
242497b48c8fSBarry Smith   PetscScalar        zero = 0.0;
242597b48c8fSBarry Smith   MatScalar         *aa;
242697b48c8fSBarry Smith   const PetscScalar *xx;
242797b48c8fSBarry Smith   PetscScalar       *bb;
242856777dd2SBarry Smith   PetscBool         *zeroed, vecs = PETSC_FALSE;
242997b48c8fSBarry Smith 
243097b48c8fSBarry Smith   PetscFunctionBegin;
2431dd8e379bSPierre Jolivet   /* fix right-hand side if needed */
243297b48c8fSBarry Smith   if (x && b) {
24339566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
24349566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
243556777dd2SBarry Smith     vecs = PETSC_TRUE;
243697b48c8fSBarry Smith   }
243797b48c8fSBarry Smith 
243897b48c8fSBarry Smith   /* zero the columns */
24399566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
244097b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
24415f80ce2aSJacob Faibussowitsch     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
244297b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
244397b48c8fSBarry Smith   }
244497b48c8fSBarry Smith   for (i = 0; i < A->rmap->N; i++) {
244597b48c8fSBarry Smith     if (!zeroed[i]) {
244697b48c8fSBarry Smith       row = i / bs;
244797b48c8fSBarry Smith       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
244897b48c8fSBarry Smith         for (k = 0; k < bs; k++) {
244997b48c8fSBarry Smith           col = bs * baij->j[j] + k;
245097b48c8fSBarry Smith           if (zeroed[col]) {
2451f4f49eeaSPierre Jolivet             aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k;
245256777dd2SBarry Smith             if (vecs) bb[i] -= aa[0] * xx[col];
245397b48c8fSBarry Smith             aa[0] = 0.0;
245497b48c8fSBarry Smith           }
245597b48c8fSBarry Smith         }
245697b48c8fSBarry Smith       }
245756777dd2SBarry Smith     } else if (vecs) bb[i] = diag * xx[i];
245897b48c8fSBarry Smith   }
24599566063dSJacob Faibussowitsch   PetscCall(PetscFree(zeroed));
246056777dd2SBarry Smith   if (vecs) {
24619566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
24629566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
246356777dd2SBarry Smith   }
246497b48c8fSBarry Smith 
246597b48c8fSBarry Smith   /* zero the rows */
246697b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
246797b48c8fSBarry Smith     row   = is_idx[i];
246897b48c8fSBarry Smith     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2469f4f49eeaSPierre Jolivet     aa    = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs);
247097b48c8fSBarry Smith     for (k = 0; k < count; k++) {
247197b48c8fSBarry Smith       aa[0] = zero;
247297b48c8fSBarry Smith       aa += bs;
247397b48c8fSBarry Smith     }
2474dbbe0bcdSBarry Smith     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
247597b48c8fSBarry Smith   }
24769566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
24773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
247897b48c8fSBarry Smith }
247997b48c8fSBarry Smith 
2480d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2481d71ae5a4SJacob Faibussowitsch {
24822d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2483e2ee6c50SBarry Smith   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2484c1ac3661SBarry Smith   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2485d0f46423SBarry Smith   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2486c1ac3661SBarry Smith   PetscInt     ridx, cidx, bs2                 = a->bs2;
2487ace3abfcSBarry Smith   PetscBool    roworiented = a->roworiented;
2488d8cdefa3SHong Zhang   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
24892d61bbb3SSatish Balay 
24902d61bbb3SSatish Balay   PetscFunctionBegin;
24912d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over added rows */
2492085a36d4SBarry Smith     row  = im[k];
2493085a36d4SBarry Smith     brow = row / bs;
24945ef9f2a5SBarry Smith     if (row < 0) continue;
24956bdcaf15SBarry Smith     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
24968e3a54c0SPierre Jolivet     rp = PetscSafePointerPlusOffset(aj, ai[brow]);
24978e3a54c0SPierre Jolivet     if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
24982d61bbb3SSatish Balay     rmax = imax[brow];
24992d61bbb3SSatish Balay     nrow = ailen[brow];
25002d61bbb3SSatish Balay     low  = 0;
2501c71e6ed7SBarry Smith     high = nrow;
25022d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over added columns */
25035ef9f2a5SBarry Smith       if (in[l] < 0) continue;
25046bdcaf15SBarry Smith       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
25059371c9d4SSatish Balay       col  = in[l];
25069371c9d4SSatish Balay       bcol = col / bs;
25079371c9d4SSatish Balay       ridx = row % bs;
25089371c9d4SSatish Balay       cidx = col % bs;
2509672ba085SHong Zhang       if (!A->structure_only) {
25102d61bbb3SSatish Balay         if (roworiented) {
25115ef9f2a5SBarry Smith           value = v[l + k * n];
25122d61bbb3SSatish Balay         } else {
25132d61bbb3SSatish Balay           value = v[k + l * m];
25142d61bbb3SSatish Balay         }
2515672ba085SHong Zhang       }
25169371c9d4SSatish Balay       if (col <= lastcol) low = 0;
25179371c9d4SSatish Balay       else high = nrow;
2518e2ee6c50SBarry Smith       lastcol = col;
25192d61bbb3SSatish Balay       while (high - low > 7) {
25202d61bbb3SSatish Balay         t = (low + high) / 2;
25212d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
25222d61bbb3SSatish Balay         else low = t;
25232d61bbb3SSatish Balay       }
25242d61bbb3SSatish Balay       for (i = low; i < high; i++) {
25252d61bbb3SSatish Balay         if (rp[i] > bcol) break;
25262d61bbb3SSatish Balay         if (rp[i] == bcol) {
25278e3a54c0SPierre Jolivet           bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2528672ba085SHong Zhang           if (!A->structure_only) {
25292d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
25302d61bbb3SSatish Balay             else *bap = value;
2531672ba085SHong Zhang           }
25322d61bbb3SSatish Balay           goto noinsert1;
25332d61bbb3SSatish Balay         }
25342d61bbb3SSatish Balay       }
25352d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
25365f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2537672ba085SHong Zhang       if (A->structure_only) {
2538672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2539672ba085SHong Zhang       } else {
2540fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2541672ba085SHong Zhang       }
25429371c9d4SSatish Balay       N = nrow++ - 1;
25439371c9d4SSatish Balay       high++;
25442d61bbb3SSatish Balay       /* shift up all the later entries in this row */
25459566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
25462d61bbb3SSatish Balay       rp[i] = bcol;
2547580bdb30SBarry Smith       if (!A->structure_only) {
25489566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
25499566063dSJacob Faibussowitsch         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2550580bdb30SBarry Smith         ap[bs2 * i + bs * cidx + ridx] = value;
2551580bdb30SBarry Smith       }
2552085a36d4SBarry Smith       a->nz++;
25532d61bbb3SSatish Balay     noinsert1:;
25542d61bbb3SSatish Balay       low = i;
25552d61bbb3SSatish Balay     }
25562d61bbb3SSatish Balay     ailen[brow] = nrow;
25572d61bbb3SSatish Balay   }
25583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25592d61bbb3SSatish Balay }
25602d61bbb3SSatish Balay 
2561ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2562d71ae5a4SJacob Faibussowitsch {
25632d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
25642d61bbb3SSatish Balay   Mat          outA;
2565ace3abfcSBarry Smith   PetscBool    row_identity, col_identity;
25662d61bbb3SSatish Balay 
25672d61bbb3SSatish Balay   PetscFunctionBegin;
25685f80ce2aSJacob Faibussowitsch   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
25699566063dSJacob Faibussowitsch   PetscCall(ISIdentity(row, &row_identity));
25709566063dSJacob Faibussowitsch   PetscCall(ISIdentity(col, &col_identity));
25715f80ce2aSJacob Faibussowitsch   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
25722d61bbb3SSatish Balay 
25732d61bbb3SSatish Balay   outA            = inA;
2574d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
25759566063dSJacob Faibussowitsch   PetscCall(PetscFree(inA->solvertype));
25769566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
25772d61bbb3SSatish Balay 
25789566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2579cf242676SKris Buschelman 
25809566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)row));
25819566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
2582c3122656SLisandro Dalcin   a->row = row;
25839566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)col));
25849566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
2585c3122656SLisandro Dalcin   a->col = col;
2586c38d4ed2SBarry Smith 
2587c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
25889566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
25899566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2590c38d4ed2SBarry Smith 
25919566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2592aa624791SPierre Jolivet   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
25939566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(outA, inA, info));
25943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25952d61bbb3SSatish Balay }
2596d9b7c43dSSatish Balay 
2597ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2598d71ae5a4SJacob Faibussowitsch {
259927a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
260027a8da17SBarry Smith 
260127a8da17SBarry Smith   PetscFunctionBegin;
2602ff6a9541SJacob Faibussowitsch   baij->nz = baij->maxnz;
2603ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2604ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
26053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
260627a8da17SBarry Smith }
260727a8da17SBarry Smith 
260827a8da17SBarry Smith /*@
2609d8a51d2aSBarry Smith   MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
261027a8da17SBarry Smith 
261127a8da17SBarry Smith   Input Parameters:
261211a5261eSBarry Smith + mat     - the `MATSEQBAIJ` matrix
2613d8a51d2aSBarry Smith - indices - the block column indices
261427a8da17SBarry Smith 
261515091d37SBarry Smith   Level: advanced
261615091d37SBarry Smith 
261727a8da17SBarry Smith   Notes:
261827a8da17SBarry Smith   This can be called if you have precomputed the nonzero structure of the
261927a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
262011a5261eSBarry Smith   of the `MatSetValues()` operation.
262127a8da17SBarry Smith 
262227a8da17SBarry Smith   You MUST have set the correct numbers of nonzeros per row in the call to
262311a5261eSBarry Smith   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
262427a8da17SBarry Smith 
262511a5261eSBarry Smith   MUST be called before any calls to `MatSetValues()`
262627a8da17SBarry Smith 
26271cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
262827a8da17SBarry Smith @*/
2629d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2630d71ae5a4SJacob Faibussowitsch {
263127a8da17SBarry Smith   PetscFunctionBegin;
26320700a824SBarry Smith   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
26334f572ea9SToby Isaac   PetscAssertPointer(indices, 2);
2634810441c8SPierre Jolivet   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
26353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
263627a8da17SBarry Smith }
263727a8da17SBarry Smith 
263866976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2639d71ae5a4SJacob Faibussowitsch {
2640273d9f13SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2641c1ac3661SBarry Smith   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2642273d9f13SBarry Smith   PetscReal    atmp;
264387828ca2SBarry Smith   PetscScalar *x, zero = 0.0;
2644273d9f13SBarry Smith   MatScalar   *aa;
2645c1ac3661SBarry Smith   PetscInt     ncols, brow, krow, kcol;
2646273d9f13SBarry Smith 
2647273d9f13SBarry Smith   PetscFunctionBegin;
26485f80ce2aSJacob Faibussowitsch   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2649d0f46423SBarry Smith   bs  = A->rmap->bs;
2650273d9f13SBarry Smith   aa  = a->a;
2651273d9f13SBarry Smith   ai  = a->i;
2652273d9f13SBarry Smith   aj  = a->j;
2653273d9f13SBarry Smith   mbs = a->mbs;
2654273d9f13SBarry Smith 
26559566063dSJacob Faibussowitsch   PetscCall(VecSet(v, zero));
26569566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
26579566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(v, &n));
26585f80ce2aSJacob Faibussowitsch   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2659273d9f13SBarry Smith   for (i = 0; i < mbs; i++) {
26609371c9d4SSatish Balay     ncols = ai[1] - ai[0];
26619371c9d4SSatish Balay     ai++;
2662273d9f13SBarry Smith     brow = bs * i;
2663273d9f13SBarry Smith     for (j = 0; j < ncols; j++) {
2664273d9f13SBarry Smith       for (kcol = 0; kcol < bs; kcol++) {
2665273d9f13SBarry Smith         for (krow = 0; krow < bs; krow++) {
26669371c9d4SSatish Balay           atmp = PetscAbsScalar(*aa);
26679371c9d4SSatish Balay           aa++;
2668273d9f13SBarry Smith           row = brow + krow; /* row index */
26699371c9d4SSatish Balay           if (PetscAbsScalar(x[row]) < atmp) {
26709371c9d4SSatish Balay             x[row] = atmp;
26719371c9d4SSatish Balay             if (idx) idx[row] = bs * (*aj) + kcol;
26729371c9d4SSatish Balay           }
2673273d9f13SBarry Smith         }
2674273d9f13SBarry Smith       }
2675273d9f13SBarry Smith       aj++;
2676273d9f13SBarry Smith     }
2677273d9f13SBarry Smith   }
26789566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
26793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2680273d9f13SBarry Smith }
2681273d9f13SBarry Smith 
2682eede4a3fSMark Adams static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2683eede4a3fSMark Adams {
2684eede4a3fSMark Adams   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2685eede4a3fSMark Adams   PetscInt     i, j, n, row, bs, *ai, mbs;
2686eede4a3fSMark Adams   PetscReal    atmp;
2687eede4a3fSMark Adams   PetscScalar *x, zero = 0.0;
2688eede4a3fSMark Adams   MatScalar   *aa;
2689eede4a3fSMark Adams   PetscInt     ncols, brow, krow, kcol;
2690eede4a3fSMark Adams 
2691eede4a3fSMark Adams   PetscFunctionBegin;
2692eede4a3fSMark Adams   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2693eede4a3fSMark Adams   bs  = A->rmap->bs;
2694eede4a3fSMark Adams   aa  = a->a;
2695eede4a3fSMark Adams   ai  = a->i;
2696eede4a3fSMark Adams   mbs = a->mbs;
2697eede4a3fSMark Adams 
2698eede4a3fSMark Adams   PetscCall(VecSet(v, zero));
2699eede4a3fSMark Adams   PetscCall(VecGetArrayWrite(v, &x));
2700eede4a3fSMark Adams   PetscCall(VecGetLocalSize(v, &n));
2701eede4a3fSMark Adams   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2702eede4a3fSMark Adams   for (i = 0; i < mbs; i++) {
2703eede4a3fSMark Adams     ncols = ai[1] - ai[0];
2704eede4a3fSMark Adams     ai++;
2705eede4a3fSMark Adams     brow = bs * i;
2706eede4a3fSMark Adams     for (j = 0; j < ncols; j++) {
2707eede4a3fSMark Adams       for (kcol = 0; kcol < bs; kcol++) {
2708eede4a3fSMark Adams         for (krow = 0; krow < bs; krow++) {
2709eede4a3fSMark Adams           atmp = PetscAbsScalar(*aa);
2710eede4a3fSMark Adams           aa++;
2711eede4a3fSMark Adams           row = brow + krow; /* row index */
2712eede4a3fSMark Adams           x[row] += atmp;
2713eede4a3fSMark Adams         }
2714eede4a3fSMark Adams       }
2715eede4a3fSMark Adams     }
2716eede4a3fSMark Adams   }
2717eede4a3fSMark Adams   PetscCall(VecRestoreArrayWrite(v, &x));
2718eede4a3fSMark Adams   PetscFunctionReturn(PETSC_SUCCESS);
2719eede4a3fSMark Adams }
2720eede4a3fSMark Adams 
272166976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2722d71ae5a4SJacob Faibussowitsch {
27233c896bc6SHong Zhang   PetscFunctionBegin;
27243c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
27253c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
27263c896bc6SHong Zhang     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
27273c896bc6SHong Zhang     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2728d88c0aacSHong Zhang     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
27293c896bc6SHong Zhang 
27305f80ce2aSJacob Faibussowitsch     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
27315f80ce2aSJacob Faibussowitsch     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
27329566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
27339566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)B));
27343c896bc6SHong Zhang   } else {
27359566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
27363c896bc6SHong Zhang   }
27373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
27383c896bc6SHong Zhang }
27393c896bc6SHong Zhang 
2740d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2741d71ae5a4SJacob Faibussowitsch {
2742f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27436e111a19SKarl Rupp 
2744f2a5309cSSatish Balay   PetscFunctionBegin;
2745f2a5309cSSatish Balay   *array = a->a;
27463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2747f2a5309cSSatish Balay }
2748f2a5309cSSatish Balay 
2749d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2750d71ae5a4SJacob Faibussowitsch {
2751f2a5309cSSatish Balay   PetscFunctionBegin;
2752cda14afcSprj-   *array = NULL;
27533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2754f2a5309cSSatish Balay }
2755f2a5309cSSatish Balay 
2756d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2757d71ae5a4SJacob Faibussowitsch {
2758b264fe52SHong Zhang   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
275952768537SHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
276052768537SHong Zhang   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
276152768537SHong Zhang 
276252768537SHong Zhang   PetscFunctionBegin;
276352768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
27649566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
27653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
276652768537SHong Zhang }
276752768537SHong Zhang 
2768d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2769d71ae5a4SJacob Faibussowitsch {
277042ee4b1aSHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
277131ce2d13SHong Zhang   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2772e838b9e7SJed Brown   PetscBLASInt one = 1;
277342ee4b1aSHong Zhang 
277442ee4b1aSHong Zhang   PetscFunctionBegin;
2775134adf20SPierre Jolivet   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2776134adf20SPierre Jolivet     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2777134adf20SPierre Jolivet     if (e) {
27789566063dSJacob Faibussowitsch       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2779134adf20SPierre Jolivet       if (e) {
27809566063dSJacob Faibussowitsch         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2781134adf20SPierre Jolivet         if (e) str = SAME_NONZERO_PATTERN;
2782134adf20SPierre Jolivet       }
2783134adf20SPierre Jolivet     }
278454c59aa7SJacob Faibussowitsch     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2785134adf20SPierre Jolivet   }
278642ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2787f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2788c5df96a5SBarry Smith     PetscBLASInt bnz;
27899566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2790792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
27919566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2792ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
27939566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
279442ee4b1aSHong Zhang   } else {
279552768537SHong Zhang     Mat       B;
279652768537SHong Zhang     PetscInt *nnz;
279754c59aa7SJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
27989566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
27999566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
28009566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
28019566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
28029566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
28039566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
28049566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
28059566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
28069566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
28079566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
28089566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
280942ee4b1aSHong Zhang   }
28103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
281142ee4b1aSHong Zhang }
281242ee4b1aSHong Zhang 
2813d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2814d71ae5a4SJacob Faibussowitsch {
2815ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
28162726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
28172726fb6dSPierre Jolivet   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
28182726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
28192726fb6dSPierre Jolivet 
28202726fb6dSPierre Jolivet   PetscFunctionBegin;
28212726fb6dSPierre Jolivet   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
28223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2823ff6a9541SJacob Faibussowitsch #else
2824ff6a9541SJacob Faibussowitsch   (void)A;
2825ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2826ff6a9541SJacob Faibussowitsch #endif
28272726fb6dSPierre Jolivet }
28282726fb6dSPierre Jolivet 
2829ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2830d71ae5a4SJacob Faibussowitsch {
2831ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
283299cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
283399cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2834dd6ea824SBarry Smith   MatScalar   *aa = a->a;
283599cafbc1SBarry Smith 
283699cafbc1SBarry Smith   PetscFunctionBegin;
283799cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
28383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2839ff6a9541SJacob Faibussowitsch #else
2840ff6a9541SJacob Faibussowitsch   (void)A;
2841ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2842ff6a9541SJacob Faibussowitsch #endif
284399cafbc1SBarry Smith }
284499cafbc1SBarry Smith 
2845ff6a9541SJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2846d71ae5a4SJacob Faibussowitsch {
2847ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
284899cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
284999cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2850dd6ea824SBarry Smith   MatScalar   *aa = a->a;
285199cafbc1SBarry Smith 
285299cafbc1SBarry Smith   PetscFunctionBegin;
285399cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
28543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2855ff6a9541SJacob Faibussowitsch #else
2856ff6a9541SJacob Faibussowitsch   (void)A;
2857ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2858ff6a9541SJacob Faibussowitsch #endif
285999cafbc1SBarry Smith }
286099cafbc1SBarry Smith 
28613acb8795SBarry Smith /*
28622479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
28633acb8795SBarry Smith */
2864ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2865d71ae5a4SJacob Faibussowitsch {
28663acb8795SBarry Smith   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
28673acb8795SBarry Smith   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
28683acb8795SBarry Smith   PetscInt     nz = a->i[m], row, *jj, mr, col;
28693acb8795SBarry Smith 
28703acb8795SBarry Smith   PetscFunctionBegin;
28713acb8795SBarry Smith   *nn = n;
28723ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28735f80ce2aSJacob Faibussowitsch   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
28749566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28759566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28773acb8795SBarry Smith   jj = a->j;
2878ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
28793acb8795SBarry Smith   cia[0] = oshift;
2880ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28819566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
28823acb8795SBarry Smith   jj = a->j;
28833acb8795SBarry Smith   for (row = 0; row < m; row++) {
28843acb8795SBarry Smith     mr = a->i[row + 1] - a->i[row];
28853acb8795SBarry Smith     for (i = 0; i < mr; i++) {
28863acb8795SBarry Smith       col = *jj++;
288726fbe8dcSKarl Rupp 
28883acb8795SBarry Smith       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
28893acb8795SBarry Smith     }
28903acb8795SBarry Smith   }
28919566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
28929371c9d4SSatish Balay   *ia = cia;
28939371c9d4SSatish Balay   *ja = cja;
28943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28953acb8795SBarry Smith }
28963acb8795SBarry Smith 
2897ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2898d71ae5a4SJacob Faibussowitsch {
28993acb8795SBarry Smith   PetscFunctionBegin;
29003ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
29019566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ia));
29029566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ja));
29033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29043acb8795SBarry Smith }
29053acb8795SBarry Smith 
2906525d23c0SHong Zhang /*
2907525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2908525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2909040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2910525d23c0SHong Zhang  */
2911d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2912d71ae5a4SJacob Faibussowitsch {
2913525d23c0SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2914c0349474SHong Zhang   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2915525d23c0SHong Zhang   PetscInt     nz = a->i[m], row, *jj, mr, col;
2916525d23c0SHong Zhang   PetscInt    *cspidx;
2917f6d58c54SBarry Smith 
2918f6d58c54SBarry Smith   PetscFunctionBegin;
2919525d23c0SHong Zhang   *nn = n;
29203ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2921f6d58c54SBarry Smith 
29229566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
29239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
29249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
29259566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cspidx));
2926525d23c0SHong Zhang   jj = a->j;
2927ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2928525d23c0SHong Zhang   cia[0] = oshift;
2929ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
29309566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
2931525d23c0SHong Zhang   jj = a->j;
2932525d23c0SHong Zhang   for (row = 0; row < m; row++) {
2933525d23c0SHong Zhang     mr = a->i[row + 1] - a->i[row];
2934525d23c0SHong Zhang     for (i = 0; i < mr; i++) {
2935525d23c0SHong Zhang       col                                         = *jj++;
2936525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2937525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2938525d23c0SHong Zhang     }
2939525d23c0SHong Zhang   }
29409566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
2941071fcb05SBarry Smith   *ia    = cia;
2942071fcb05SBarry Smith   *ja    = cja;
2943525d23c0SHong Zhang   *spidx = cspidx;
29443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2945f6d58c54SBarry Smith }
2946f6d58c54SBarry Smith 
2947d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2948d71ae5a4SJacob Faibussowitsch {
2949525d23c0SHong Zhang   PetscFunctionBegin;
29509566063dSJacob Faibussowitsch   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
29519566063dSJacob Faibussowitsch   PetscCall(PetscFree(*spidx));
29523ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2953f6d58c54SBarry Smith }
295499cafbc1SBarry Smith 
295566976f2fSJacob Faibussowitsch static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2956d71ae5a4SJacob Faibussowitsch {
29577d68702bSBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
29587d68702bSBarry Smith 
29597d68702bSBarry Smith   PetscFunctionBegin;
296048a46eb9SPierre Jolivet   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
29619566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
29623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29637d68702bSBarry Smith }
29647d68702bSBarry Smith 
296517ea310bSPierre Jolivet PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
296617ea310bSPierre Jolivet {
296717ea310bSPierre Jolivet   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
296817ea310bSPierre Jolivet   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
296917ea310bSPierre Jolivet   PetscInt     m = A->rmap->N, *ailen = a->ilen;
297017ea310bSPierre Jolivet   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
297117ea310bSPierre Jolivet   MatScalar   *aa = a->a, *ap;
297217ea310bSPierre Jolivet   PetscBool    zero;
297317ea310bSPierre Jolivet 
297417ea310bSPierre Jolivet   PetscFunctionBegin;
297517ea310bSPierre Jolivet   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
297617ea310bSPierre Jolivet   if (m) rmax = ailen[0];
297717ea310bSPierre Jolivet   for (i = 1; i <= mbs; i++) {
297817ea310bSPierre Jolivet     for (k = ai[i - 1]; k < ai[i]; k++) {
297917ea310bSPierre Jolivet       zero = PETSC_TRUE;
298017ea310bSPierre Jolivet       ap   = aa + bs2 * k;
298117ea310bSPierre Jolivet       for (j = 0; j < bs2 && zero; j++) {
298217ea310bSPierre Jolivet         if (ap[j] != 0.0) zero = PETSC_FALSE;
298317ea310bSPierre Jolivet       }
298417ea310bSPierre Jolivet       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
298517ea310bSPierre Jolivet       else {
298617ea310bSPierre Jolivet         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
298717ea310bSPierre Jolivet         aj[k - fshift] = aj[k];
298817ea310bSPierre Jolivet         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
298917ea310bSPierre Jolivet       }
299017ea310bSPierre Jolivet     }
299117ea310bSPierre Jolivet     ai[i - 1] -= fshift_prev;
299217ea310bSPierre Jolivet     fshift_prev  = fshift;
299317ea310bSPierre Jolivet     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
299417ea310bSPierre Jolivet     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
299517ea310bSPierre Jolivet     rmax = PetscMax(rmax, ailen[i - 1]);
299617ea310bSPierre Jolivet   }
299717ea310bSPierre Jolivet   if (fshift) {
299817ea310bSPierre Jolivet     if (mbs) {
299917ea310bSPierre Jolivet       ai[mbs] -= fshift;
300017ea310bSPierre Jolivet       a->nz = ai[mbs];
300117ea310bSPierre Jolivet     }
300217ea310bSPierre Jolivet     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
300317ea310bSPierre Jolivet     A->nonzerostate++;
300417ea310bSPierre Jolivet     A->info.nz_unneeded += (PetscReal)fshift;
300517ea310bSPierre Jolivet     a->rmax = rmax;
300617ea310bSPierre Jolivet     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
300717ea310bSPierre Jolivet     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
300817ea310bSPierre Jolivet   }
300917ea310bSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
301017ea310bSPierre Jolivet }
301117ea310bSPierre Jolivet 
3012dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
3013cc2dc46cSBarry Smith                                        MatGetRow_SeqBAIJ,
3014cc2dc46cSBarry Smith                                        MatRestoreRow_SeqBAIJ,
3015cc2dc46cSBarry Smith                                        MatMult_SeqBAIJ_N,
301697304618SKris Buschelman                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
30177c922b88SBarry Smith                                        MatMultTranspose_SeqBAIJ,
30187c922b88SBarry Smith                                        MatMultTransposeAdd_SeqBAIJ,
3019f4259b30SLisandro Dalcin                                        NULL,
3020f4259b30SLisandro Dalcin                                        NULL,
3021f4259b30SLisandro Dalcin                                        NULL,
3022f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
3023cc2dc46cSBarry Smith                                        MatLUFactor_SeqBAIJ,
3024f4259b30SLisandro Dalcin                                        NULL,
3025f4259b30SLisandro Dalcin                                        NULL,
3026f2501298SSatish Balay                                        MatTranspose_SeqBAIJ,
302797304618SKris Buschelman                                        /* 15*/ MatGetInfo_SeqBAIJ,
3028cc2dc46cSBarry Smith                                        MatEqual_SeqBAIJ,
3029cc2dc46cSBarry Smith                                        MatGetDiagonal_SeqBAIJ,
3030cc2dc46cSBarry Smith                                        MatDiagonalScale_SeqBAIJ,
3031cc2dc46cSBarry Smith                                        MatNorm_SeqBAIJ,
3032f4259b30SLisandro Dalcin                                        /* 20*/ NULL,
3033cc2dc46cSBarry Smith                                        MatAssemblyEnd_SeqBAIJ,
3034cc2dc46cSBarry Smith                                        MatSetOption_SeqBAIJ,
3035cc2dc46cSBarry Smith                                        MatZeroEntries_SeqBAIJ,
3036d519adbfSMatthew Knepley                                        /* 24*/ MatZeroRows_SeqBAIJ,
3037f4259b30SLisandro Dalcin                                        NULL,
3038f4259b30SLisandro Dalcin                                        NULL,
3039f4259b30SLisandro Dalcin                                        NULL,
3040f4259b30SLisandro Dalcin                                        NULL,
304126cec326SBarry Smith                                        /* 29*/ MatSetUp_Seq_Hash,
3042f4259b30SLisandro Dalcin                                        NULL,
3043f4259b30SLisandro Dalcin                                        NULL,
3044f4259b30SLisandro Dalcin                                        NULL,
3045f4259b30SLisandro Dalcin                                        NULL,
3046d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_SeqBAIJ,
3047f4259b30SLisandro Dalcin                                        NULL,
3048f4259b30SLisandro Dalcin                                        NULL,
3049cc2dc46cSBarry Smith                                        MatILUFactor_SeqBAIJ,
3050f4259b30SLisandro Dalcin                                        NULL,
3051d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_SeqBAIJ,
30527dae84e0SHong Zhang                                        MatCreateSubMatrices_SeqBAIJ,
3053cc2dc46cSBarry Smith                                        MatIncreaseOverlap_SeqBAIJ,
3054cc2dc46cSBarry Smith                                        MatGetValues_SeqBAIJ,
30553c896bc6SHong Zhang                                        MatCopy_SeqBAIJ,
3056f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
3057cc2dc46cSBarry Smith                                        MatScale_SeqBAIJ,
30587d68702bSBarry Smith                                        MatShift_SeqBAIJ,
3059f4259b30SLisandro Dalcin                                        NULL,
306097b48c8fSBarry Smith                                        MatZeroRowsColumns_SeqBAIJ,
3061f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
30623b2fbd54SBarry Smith                                        MatGetRowIJ_SeqBAIJ,
306392c4ed94SBarry Smith                                        MatRestoreRowIJ_SeqBAIJ,
30643acb8795SBarry Smith                                        MatGetColumnIJ_SeqBAIJ,
30653acb8795SBarry Smith                                        MatRestoreColumnIJ_SeqBAIJ,
306693dfae19SHong Zhang                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3067f4259b30SLisandro Dalcin                                        NULL,
3068f4259b30SLisandro Dalcin                                        NULL,
3069090001bdSToby Isaac                                        NULL,
3070d3825aa8SBarry Smith                                        MatSetValuesBlocked_SeqBAIJ,
30717dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3072b9b97703SBarry Smith                                        MatDestroy_SeqBAIJ,
3073b9b97703SBarry Smith                                        MatView_SeqBAIJ,
3074f4259b30SLisandro Dalcin                                        NULL,
3075f4259b30SLisandro Dalcin                                        NULL,
3076f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
3077f4259b30SLisandro Dalcin                                        NULL,
3078f4259b30SLisandro Dalcin                                        NULL,
3079f4259b30SLisandro Dalcin                                        NULL,
3080f4259b30SLisandro Dalcin                                        NULL,
3081d519adbfSMatthew Knepley                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
3082f4259b30SLisandro Dalcin                                        NULL,
3083c87e5d42SMatthew Knepley                                        MatConvert_Basic,
3084f4259b30SLisandro Dalcin                                        NULL,
3085f4259b30SLisandro Dalcin                                        NULL,
3086f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
3087f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
3088f4259b30SLisandro Dalcin                                        NULL,
3089f4259b30SLisandro Dalcin                                        NULL,
3090f4259b30SLisandro Dalcin                                        NULL,
3091f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
3092f4259b30SLisandro Dalcin                                        NULL,
3093f4259b30SLisandro Dalcin                                        NULL,
3094f4259b30SLisandro Dalcin                                        NULL,
30955bba2384SShri Abhyankar                                        MatLoad_SeqBAIJ,
3096f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
3097f4259b30SLisandro Dalcin                                        NULL,
3098f4259b30SLisandro Dalcin                                        NULL,
3099f4259b30SLisandro Dalcin                                        NULL,
3100f4259b30SLisandro Dalcin                                        NULL,
3101f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
3102f4259b30SLisandro Dalcin                                        NULL,
3103f4259b30SLisandro Dalcin                                        NULL,
3104f4259b30SLisandro Dalcin                                        NULL,
3105f4259b30SLisandro Dalcin                                        NULL,
3106f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
3107f4259b30SLisandro Dalcin                                        NULL,
3108f4259b30SLisandro Dalcin                                        NULL,
3109f4259b30SLisandro Dalcin                                        NULL,
3110f4259b30SLisandro Dalcin                                        NULL,
3111f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
3112f4259b30SLisandro Dalcin                                        NULL,
3113f4259b30SLisandro Dalcin                                        NULL,
31142726fb6dSPierre Jolivet                                        MatConjugate_SeqBAIJ,
3115f4259b30SLisandro Dalcin                                        NULL,
3116f4259b30SLisandro Dalcin                                        /*104*/ NULL,
311799cafbc1SBarry Smith                                        MatRealPart_SeqBAIJ,
31182af78befSBarry Smith                                        MatImaginaryPart_SeqBAIJ,
3119f4259b30SLisandro Dalcin                                        NULL,
3120f4259b30SLisandro Dalcin                                        NULL,
3121f4259b30SLisandro Dalcin                                        /*109*/ NULL,
3122f4259b30SLisandro Dalcin                                        NULL,
3123f4259b30SLisandro Dalcin                                        NULL,
3124f4259b30SLisandro Dalcin                                        NULL,
3125547795f9SHong Zhang                                        MatMissingDiagonal_SeqBAIJ,
3126f4259b30SLisandro Dalcin                                        /*114*/ NULL,
3127f4259b30SLisandro Dalcin                                        NULL,
3128f4259b30SLisandro Dalcin                                        NULL,
3129f4259b30SLisandro Dalcin                                        NULL,
3130f4259b30SLisandro Dalcin                                        NULL,
3131f4259b30SLisandro Dalcin                                        /*119*/ NULL,
3132f4259b30SLisandro Dalcin                                        NULL,
3133547795f9SHong Zhang                                        MatMultHermitianTranspose_SeqBAIJ,
3134d6037b41SHong Zhang                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3135f4259b30SLisandro Dalcin                                        NULL,
3136f4259b30SLisandro Dalcin                                        /*124*/ NULL,
3137857cbf51SRichard Tran Mills                                        MatGetColumnReductions_SeqBAIJ,
31383964eb88SJed Brown                                        MatInvertBlockDiagonal_SeqBAIJ,
3139f4259b30SLisandro Dalcin                                        NULL,
3140f4259b30SLisandro Dalcin                                        NULL,
3141f4259b30SLisandro Dalcin                                        /*129*/ NULL,
3142f4259b30SLisandro Dalcin                                        NULL,
3143f4259b30SLisandro Dalcin                                        NULL,
3144f4259b30SLisandro Dalcin                                        NULL,
3145f4259b30SLisandro Dalcin                                        NULL,
3146f4259b30SLisandro Dalcin                                        /*134*/ NULL,
3147f4259b30SLisandro Dalcin                                        NULL,
3148f4259b30SLisandro Dalcin                                        NULL,
3149f4259b30SLisandro Dalcin                                        NULL,
3150f4259b30SLisandro Dalcin                                        NULL,
315146533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_Default,
3152f4259b30SLisandro Dalcin                                        NULL,
3153f4259b30SLisandro Dalcin                                        NULL,
3154bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_SeqXAIJ,
3155f4259b30SLisandro Dalcin                                        NULL,
315686e85357SHong Zhang                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3157d70f29a3SPierre Jolivet                                        MatDestroySubMatrices_SeqBAIJ,
3158d70f29a3SPierre Jolivet                                        NULL,
315999a7f59eSMark Adams                                        NULL,
316099a7f59eSMark Adams                                        NULL,
31617fb60732SBarry Smith                                        NULL,
31627fb60732SBarry Smith                                        /*150*/ NULL,
3163eede4a3fSMark Adams                                        MatEliminateZeros_SeqBAIJ,
31644cc2b5b5SPierre Jolivet                                        MatGetRowSumAbs_SeqBAIJ,
31654cc2b5b5SPierre Jolivet                                        NULL};
31662593348eSBarry Smith 
3167ff6a9541SJacob Faibussowitsch static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3168d71ae5a4SJacob Faibussowitsch {
31693e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
31708ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
31713e90b805SBarry Smith 
31723e90b805SBarry Smith   PetscFunctionBegin;
31735f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
31743e90b805SBarry Smith 
31753e90b805SBarry Smith   /* allocate space for values if not already there */
3176ff6a9541SJacob Faibussowitsch   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
31773e90b805SBarry Smith 
31783e90b805SBarry Smith   /* copy values over */
31799566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
31803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31813e90b805SBarry Smith }
31823e90b805SBarry Smith 
3183ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3184d71ae5a4SJacob Faibussowitsch {
31853e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
31868ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
31873e90b805SBarry Smith 
31883e90b805SBarry Smith   PetscFunctionBegin;
31895f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
31905f80ce2aSJacob Faibussowitsch   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
31913e90b805SBarry Smith 
31923e90b805SBarry Smith   /* copy values over */
31939566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
31943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31953e90b805SBarry Smith }
31963e90b805SBarry Smith 
3197cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3198cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3199273d9f13SBarry Smith 
3200f9663b93SPierre Jolivet PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3201d71ae5a4SJacob Faibussowitsch {
3202ad79cf63SBarry Smith   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3203535b19f3SBarry Smith   PetscInt     i, mbs, nbs, bs2;
32048afaa268SBarry Smith   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3205a23d5eceSKris Buschelman 
3206a23d5eceSKris Buschelman   PetscFunctionBegin;
3207ad79cf63SBarry Smith   if (B->hash_active) {
3208ad79cf63SBarry Smith     PetscInt bs;
3209aea10558SJacob Faibussowitsch     B->ops[0] = b->cops;
3210ad79cf63SBarry Smith     PetscCall(PetscHMapIJVDestroy(&b->ht));
3211ad79cf63SBarry Smith     PetscCall(MatGetBlockSize(B, &bs));
3212ad79cf63SBarry Smith     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3213ad79cf63SBarry Smith     PetscCall(PetscFree(b->dnz));
3214ad79cf63SBarry Smith     PetscCall(PetscFree(b->bdnz));
3215ad79cf63SBarry Smith     B->hash_active = PETSC_FALSE;
3216ad79cf63SBarry Smith   }
32172576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3218ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
3219ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
3220ab93d7beSBarry Smith     nz             = 0;
3221ab93d7beSBarry Smith   }
32228c07d4e3SBarry Smith 
32239566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
32249566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
32259566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
32269566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3227899cda47SBarry Smith 
3228899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
3229899cda47SBarry Smith 
3230d0f46423SBarry Smith   mbs = B->rmap->n / bs;
3231d0f46423SBarry Smith   nbs = B->cmap->n / bs;
3232a23d5eceSKris Buschelman   bs2 = bs * bs;
3233a23d5eceSKris Buschelman 
32345f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3235a23d5eceSKris Buschelman 
3236a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
32375f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3238a23d5eceSKris Buschelman   if (nnz) {
3239a23d5eceSKris Buschelman     for (i = 0; i < mbs; i++) {
32405f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
32415f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3242a23d5eceSKris Buschelman     }
3243a23d5eceSKris Buschelman   }
3244a23d5eceSKris Buschelman 
3245d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
32469566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3247d0609cedSBarry Smith   PetscOptionsEnd();
32488c07d4e3SBarry Smith 
3249a23d5eceSKris Buschelman   if (!flg) {
3250a23d5eceSKris Buschelman     switch (bs) {
3251a23d5eceSKris Buschelman     case 1:
3252a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
3253a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3254a23d5eceSKris Buschelman       break;
3255a23d5eceSKris Buschelman     case 2:
3256a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
3257a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3258a23d5eceSKris Buschelman       break;
3259a23d5eceSKris Buschelman     case 3:
3260a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
3261a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3262a23d5eceSKris Buschelman       break;
3263a23d5eceSKris Buschelman     case 4:
3264a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
3265a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3266a23d5eceSKris Buschelman       break;
3267a23d5eceSKris Buschelman     case 5:
3268a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
3269a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3270a23d5eceSKris Buschelman       break;
3271a23d5eceSKris Buschelman     case 6:
3272a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
3273a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3274a23d5eceSKris Buschelman       break;
3275a23d5eceSKris Buschelman     case 7:
3276a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
3277a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3278a23d5eceSKris Buschelman       break;
32799371c9d4SSatish Balay     case 9: {
32806679dcc1SBarry Smith       PetscInt version = 1;
32819566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32826679dcc1SBarry Smith       switch (version) {
32835f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32846679dcc1SBarry Smith       case 1:
328596e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
328696e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
32879566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32886679dcc1SBarry Smith         break;
32896679dcc1SBarry Smith #endif
32906679dcc1SBarry Smith       default:
329196e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
329296e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
32939566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
329496e086a2SDaniel Kokron         break;
32956679dcc1SBarry Smith       }
32966679dcc1SBarry Smith       break;
32976679dcc1SBarry Smith     }
3298ebada01fSBarry Smith     case 11:
3299ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
3300ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3301ebada01fSBarry Smith       break;
33029371c9d4SSatish Balay     case 12: {
33036679dcc1SBarry Smith       PetscInt version = 1;
33049566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
33056679dcc1SBarry Smith       switch (version) {
33066679dcc1SBarry Smith       case 1:
33076679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
33086679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
33099566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33108ab949d8SShri Abhyankar         break;
33116679dcc1SBarry Smith       case 2:
33126679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
33136679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
33149566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33156679dcc1SBarry Smith         break;
33166679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
33176679dcc1SBarry Smith       case 3:
33186679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
33196679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
33209566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
33216679dcc1SBarry Smith         break;
33226679dcc1SBarry Smith #endif
3323a23d5eceSKris Buschelman       default:
3324a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
3325a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
33269566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
33276679dcc1SBarry Smith         break;
33286679dcc1SBarry Smith       }
33296679dcc1SBarry Smith       break;
33306679dcc1SBarry Smith     }
33319371c9d4SSatish Balay     case 15: {
33326679dcc1SBarry Smith       PetscInt version = 1;
33339566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
33346679dcc1SBarry Smith       switch (version) {
33356679dcc1SBarry Smith       case 1:
33366679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
33379566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33386679dcc1SBarry Smith         break;
33396679dcc1SBarry Smith       case 2:
33406679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
33419566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33426679dcc1SBarry Smith         break;
33436679dcc1SBarry Smith       case 3:
33446679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
33459566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33466679dcc1SBarry Smith         break;
33476679dcc1SBarry Smith       case 4:
33486679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
33499566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
33506679dcc1SBarry Smith         break;
33516679dcc1SBarry Smith       default:
33526679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_N;
33539566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
33546679dcc1SBarry Smith         break;
33556679dcc1SBarry Smith       }
33566679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
33576679dcc1SBarry Smith       break;
33586679dcc1SBarry Smith     }
33596679dcc1SBarry Smith     default:
33606679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
33616679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
33629566063dSJacob Faibussowitsch       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3363a23d5eceSKris Buschelman       break;
3364a23d5eceSKris Buschelman     }
3365a23d5eceSKris Buschelman   }
3366e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
3367a23d5eceSKris Buschelman   b->mbs      = mbs;
3368a23d5eceSKris Buschelman   b->nbs      = nbs;
3369ab93d7beSBarry Smith   if (!skipallocation) {
33702ee49352SLisandro Dalcin     if (!b->imax) {
33719566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
337226fbe8dcSKarl Rupp 
33734fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
33742ee49352SLisandro Dalcin     }
3375ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
337626fbe8dcSKarl Rupp     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3377a23d5eceSKris Buschelman     if (!nnz) {
3378a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3379c62bd62aSJed Brown       else if (nz < 0) nz = 1;
33805d2a9ed1SStefano Zampini       nz = PetscMin(nz, nbs);
3381a23d5eceSKris Buschelman       for (i = 0; i < mbs; i++) b->imax[i] = nz;
33829566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, mbs, &nz));
3383a23d5eceSKris Buschelman     } else {
3384c73702f5SBarry Smith       PetscInt64 nz64 = 0;
33859371c9d4SSatish Balay       for (i = 0; i < mbs; i++) {
33869371c9d4SSatish Balay         b->imax[i] = nnz[i];
33879371c9d4SSatish Balay         nz64 += nnz[i];
33889371c9d4SSatish Balay       }
33899566063dSJacob Faibussowitsch       PetscCall(PetscIntCast(nz64, &nz));
3390a23d5eceSKris Buschelman     }
3391a23d5eceSKris Buschelman 
3392a23d5eceSKris Buschelman     /* allocate the matrix space */
33939566063dSJacob Faibussowitsch     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
33949f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
33959f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3396672ba085SHong Zhang     if (B->structure_only) {
33979f0612e4SBarry Smith       b->free_a = PETSC_FALSE;
3398672ba085SHong Zhang     } else {
33996679dcc1SBarry Smith       PetscInt nzbs2 = 0;
34009566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
34019f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
34029f0612e4SBarry Smith       b->free_a = PETSC_TRUE;
34039566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(b->a, nz * bs2));
3404672ba085SHong Zhang     }
3405672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
34069f0612e4SBarry Smith     PetscCall(PetscArrayzero(b->j, nz));
3407672ba085SHong Zhang 
3408a23d5eceSKris Buschelman     b->i[0] = 0;
3409ad540459SPierre Jolivet     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3410e811da20SHong Zhang   } else {
3411e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3412e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3413ab93d7beSBarry Smith   }
3414a23d5eceSKris Buschelman 
3415a23d5eceSKris Buschelman   b->bs2              = bs2;
3416a23d5eceSKris Buschelman   b->mbs              = mbs;
3417a23d5eceSKris Buschelman   b->nz               = 0;
3418b32cb4a7SJed Brown   b->maxnz            = nz;
3419b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3420cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3421cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
34229566063dSJacob Faibussowitsch   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
34233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3424a23d5eceSKris Buschelman }
3425a23d5eceSKris Buschelman 
342666976f2fSJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3427d71ae5a4SJacob Faibussowitsch {
3428725b52f3SLisandro Dalcin   PetscInt     i, m, nz, nz_max = 0, *nnz;
3429f4259b30SLisandro Dalcin   PetscScalar *values      = NULL;
3430d47bf9aaSJed Brown   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3431725b52f3SLisandro Dalcin 
3432725b52f3SLisandro Dalcin   PetscFunctionBegin;
34335f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
34349566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
34359566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
34369566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
34379566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
34389566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3439d0f46423SBarry Smith   m = B->rmap->n / bs;
3440725b52f3SLisandro Dalcin 
34415f80ce2aSJacob Faibussowitsch   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
34429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &nnz));
3443725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3444cf12db73SBarry Smith     nz = ii[i + 1] - ii[i];
34455f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3446725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3447725b52f3SLisandro Dalcin     nnz[i] = nz;
3448725b52f3SLisandro Dalcin   }
34499566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
34509566063dSJacob Faibussowitsch   PetscCall(PetscFree(nnz));
3451725b52f3SLisandro Dalcin 
3452725b52f3SLisandro Dalcin   values = (PetscScalar *)V;
345348a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3454725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3455cf12db73SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
3456cf12db73SBarry Smith     const PetscInt *icols = jj + ii[i];
3457bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3458cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
34599566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
34603adadaf3SJed Brown     } else {
34613adadaf3SJed Brown       PetscInt j;
34623adadaf3SJed Brown       for (j = 0; j < ncols; j++) {
34633adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
34649566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
34653adadaf3SJed Brown       }
34663adadaf3SJed Brown     }
3467725b52f3SLisandro Dalcin   }
34689566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
34699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
34709566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
34719566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
34723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3473725b52f3SLisandro Dalcin }
3474725b52f3SLisandro Dalcin 
3475cda14afcSprj- /*@C
347611a5261eSBarry Smith   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3477cda14afcSprj- 
3478cda14afcSprj-   Not Collective
3479cda14afcSprj- 
3480cda14afcSprj-   Input Parameter:
3481fe59aa6dSJacob Faibussowitsch . A - a `MATSEQBAIJ` matrix
3482cda14afcSprj- 
3483cda14afcSprj-   Output Parameter:
3484cda14afcSprj- . array - pointer to the data
3485cda14afcSprj- 
3486cda14afcSprj-   Level: intermediate
3487cda14afcSprj- 
34881cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3489cda14afcSprj- @*/
34905d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3491d71ae5a4SJacob Faibussowitsch {
3492cda14afcSprj-   PetscFunctionBegin;
3493cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
34943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3495cda14afcSprj- }
3496cda14afcSprj- 
3497cda14afcSprj- /*@C
349811a5261eSBarry Smith   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3499cda14afcSprj- 
3500cda14afcSprj-   Not Collective
3501cda14afcSprj- 
3502cda14afcSprj-   Input Parameters:
3503fe59aa6dSJacob Faibussowitsch + A     - a `MATSEQBAIJ` matrix
3504cda14afcSprj- - array - pointer to the data
3505cda14afcSprj- 
3506cda14afcSprj-   Level: intermediate
3507cda14afcSprj- 
35081cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3509cda14afcSprj- @*/
35105d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3511d71ae5a4SJacob Faibussowitsch {
3512cda14afcSprj-   PetscFunctionBegin;
3513cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
35143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3515cda14afcSprj- }
3516cda14afcSprj- 
35170bad9183SKris Buschelman /*MC
3518fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
35190bad9183SKris Buschelman    block sparse compressed row format.
35200bad9183SKris Buschelman 
35210bad9183SKris Buschelman    Options Database Keys:
352220f4b53cSBarry Smith + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
35236679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
35240bad9183SKris Buschelman 
35250bad9183SKris Buschelman    Level: beginner
35260cd7f59aSBarry Smith 
35270cd7f59aSBarry Smith    Notes:
352811a5261eSBarry Smith    `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
352911a5261eSBarry Smith    space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
35300bad9183SKris Buschelman 
35312ef1f0ffSBarry Smith    Run with `-info` to see what version of the matrix-vector product is being used
35326679dcc1SBarry Smith 
35331cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
35340bad9183SKris Buschelman M*/
35350bad9183SKris Buschelman 
3536cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3537b24902e0SBarry Smith 
3538d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3539d71ae5a4SJacob Faibussowitsch {
3540c1ac3661SBarry Smith   PetscMPIInt  size;
3541b6490206SBarry Smith   Mat_SeqBAIJ *b;
35423b2fbd54SBarry Smith 
35433a40ed3dSBarry Smith   PetscFunctionBegin;
35449566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
35455f80ce2aSJacob Faibussowitsch   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3546b6490206SBarry Smith 
35474dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
3548b0a32e0cSBarry Smith   B->data   = (void *)b;
3549aea10558SJacob Faibussowitsch   B->ops[0] = MatOps_Values;
355026fbe8dcSKarl Rupp 
3551f4259b30SLisandro Dalcin   b->row          = NULL;
3552f4259b30SLisandro Dalcin   b->col          = NULL;
3553f4259b30SLisandro Dalcin   b->icol         = NULL;
35542593348eSBarry Smith   b->reallocs     = 0;
3555f4259b30SLisandro Dalcin   b->saved_values = NULL;
35562593348eSBarry Smith 
3557c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
35582593348eSBarry Smith   b->nonew              = 0;
3559f4259b30SLisandro Dalcin   b->diag               = NULL;
3560f4259b30SLisandro Dalcin   B->spptr              = NULL;
3561b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3562a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
35634e220ebcSLois Curfman McInnes 
35649566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
35659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
35669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
35679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
35689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
35699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
35709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
35719566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
35729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
35739566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
35747ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
35759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
35767ea3e4caSstefano_zampini #endif
35779566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
35789566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
35793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
35802593348eSBarry Smith }
35812593348eSBarry Smith 
3582d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3583d71ae5a4SJacob Faibussowitsch {
3584b24902e0SBarry Smith   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3585a96a251dSBarry Smith   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3586de6a44a3SBarry Smith 
35873a40ed3dSBarry Smith   PetscFunctionBegin;
358831fe6a7dSBarry Smith   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
35895f80ce2aSJacob Faibussowitsch   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
35902593348eSBarry Smith 
35914fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35924fd072dbSBarry Smith     c->imax           = a->imax;
35934fd072dbSBarry Smith     c->ilen           = a->ilen;
35944fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
35954fd072dbSBarry Smith   } else {
35969566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3597b6490206SBarry Smith     for (i = 0; i < mbs; i++) {
35982593348eSBarry Smith       c->imax[i] = a->imax[i];
35992593348eSBarry Smith       c->ilen[i] = a->ilen[i];
36002593348eSBarry Smith     }
36014fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
36024fd072dbSBarry Smith   }
36032593348eSBarry Smith 
36042593348eSBarry Smith   /* allocate the matrix space */
360516a2bf60SHong Zhang   if (mallocmatspace) {
36064fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
36079f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
36089f0612e4SBarry Smith       PetscCall(PetscArrayzero(c->a, bs2 * nz));
36099f0612e4SBarry Smith       c->free_a       = PETSC_TRUE;
36104fd072dbSBarry Smith       c->i            = a->i;
36114fd072dbSBarry Smith       c->j            = a->j;
3612379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
36134fd072dbSBarry Smith       c->parent       = A;
36141e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
36151e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
361626fbe8dcSKarl Rupp 
36179566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)A));
36189566063dSJacob Faibussowitsch       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
36199566063dSJacob Faibussowitsch       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
36204fd072dbSBarry Smith     } else {
36219f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
36229f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
36239f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3624379be0ddSLisandro Dalcin       c->free_a  = PETSC_TRUE;
36254fd072dbSBarry Smith       c->free_ij = PETSC_TRUE;
362626fbe8dcSKarl Rupp 
36279566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3628b6490206SBarry Smith       if (mbs > 0) {
36299566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(c->j, a->j, nz));
36302e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
36319566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
36322e8a6d31SBarry Smith         } else {
36339566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(c->a, bs2 * nz));
36342593348eSBarry Smith         }
36352593348eSBarry Smith       }
36361e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
36371e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
363816a2bf60SHong Zhang     }
36394fd072dbSBarry Smith   }
364016a2bf60SHong Zhang 
36412593348eSBarry Smith   c->roworiented = a->roworiented;
36422593348eSBarry Smith   c->nonew       = a->nonew;
364326fbe8dcSKarl Rupp 
36449566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
36459566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
364626fbe8dcSKarl Rupp 
36475c9eb25fSBarry Smith   c->bs2 = a->bs2;
36485c9eb25fSBarry Smith   c->mbs = a->mbs;
36495c9eb25fSBarry Smith   c->nbs = a->nbs;
36502593348eSBarry Smith 
36512593348eSBarry Smith   if (a->diag) {
36524fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
36534fd072dbSBarry Smith       c->diag      = a->diag;
36544fd072dbSBarry Smith       c->free_diag = PETSC_FALSE;
36554fd072dbSBarry Smith     } else {
36569566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
365726fbe8dcSKarl Rupp       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
36584fd072dbSBarry Smith       c->free_diag = PETSC_TRUE;
36594fd072dbSBarry Smith     }
3660f4259b30SLisandro Dalcin   } else c->diag = NULL;
366126fbe8dcSKarl Rupp 
36622593348eSBarry Smith   c->nz         = a->nz;
3663f2cbd3d5SJed Brown   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3664f361c04dSBarry Smith   c->solve_work = NULL;
3665f361c04dSBarry Smith   c->mult_work  = NULL;
3666f361c04dSBarry Smith   c->sor_workt  = NULL;
3667f361c04dSBarry Smith   c->sor_work   = NULL;
366888e51ccdSHong Zhang 
366988e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
367088e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3671cd6b891eSBarry Smith   if (a->compressedrow.use) {
367288e51ccdSHong Zhang     i = a->compressedrow.nrows;
36739566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
36749566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
36759566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
367688e51ccdSHong Zhang   } else {
367788e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
36780298fd71SBarry Smith     c->compressedrow.i      = NULL;
36790298fd71SBarry Smith     c->compressedrow.rindex = NULL;
368088e51ccdSHong Zhang   }
3681c05f355bSMark Adams   c->nonzerorowcnt = a->nonzerorowcnt;
3682e56f5c9eSBarry Smith   C->nonzerostate  = A->nonzerostate;
368326fbe8dcSKarl Rupp 
36849566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
36853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
36862593348eSBarry Smith }
36872593348eSBarry Smith 
3688d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3689d71ae5a4SJacob Faibussowitsch {
3690b24902e0SBarry Smith   PetscFunctionBegin;
36919566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
36929566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
36939566063dSJacob Faibussowitsch   PetscCall(MatSetType(*B, MATSEQBAIJ));
36949566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
36953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3696b24902e0SBarry Smith }
3697b24902e0SBarry Smith 
3698618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3699d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3700d71ae5a4SJacob Faibussowitsch {
3701b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3702b51a4376SLisandro Dalcin   PetscInt    *rowidxs, *colidxs;
3703b51a4376SLisandro Dalcin   PetscScalar *matvals;
3704b51a4376SLisandro Dalcin 
3705b51a4376SLisandro Dalcin   PetscFunctionBegin;
37069566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
3707b51a4376SLisandro Dalcin 
3708b51a4376SLisandro Dalcin   /* read matrix header */
37099566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
37105f80ce2aSJacob Faibussowitsch   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
37119371c9d4SSatish Balay   M  = header[1];
37129371c9d4SSatish Balay   N  = header[2];
37139371c9d4SSatish Balay   nz = header[3];
37145f80ce2aSJacob Faibussowitsch   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
37155f80ce2aSJacob Faibussowitsch   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
37165f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3717b51a4376SLisandro Dalcin 
3718b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
37199566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3720b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3721b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3722b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3723b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3724b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
37259566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
37269566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
3727b51a4376SLisandro Dalcin 
3728b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
37299566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
37305f80ce2aSJacob Faibussowitsch   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
37319566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
37329566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
37339371c9d4SSatish Balay   mbs = m / bs;
37349371c9d4SSatish Balay   nbs = n / bs;
3735b51a4376SLisandro Dalcin 
3736b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
37379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
37389566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
37399371c9d4SSatish Balay   rowidxs[0] = 0;
37409371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3741b51a4376SLisandro Dalcin   sum = rowidxs[m];
37425f80ce2aSJacob Faibussowitsch   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3743b51a4376SLisandro Dalcin 
3744b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
37459566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
37469566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
37479566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3748b51a4376SLisandro Dalcin 
3749b51a4376SLisandro Dalcin   {               /* preallocate matrix storage */
3750b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3751b51a4376SLisandro Dalcin     PetscInt *nnz;
3752618cc2edSLisandro Dalcin     PetscBool sbaij;
3753b51a4376SLisandro Dalcin 
37549566063dSJacob Faibussowitsch     PetscCall(PetscBTCreate(nbs, &bt));
37559566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(mbs, &nnz));
37569566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3757b51a4376SLisandro Dalcin     for (i = 0; i < mbs; i++) {
37589566063dSJacob Faibussowitsch       PetscCall(PetscBTMemzero(nbs, bt));
3759618cc2edSLisandro Dalcin       for (k = 0; k < bs; k++) {
3760618cc2edSLisandro Dalcin         PetscInt row = bs * i + k;
3761618cc2edSLisandro Dalcin         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3762618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3763618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3764618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3765618cc2edSLisandro Dalcin         }
3766618cc2edSLisandro Dalcin       }
3767b51a4376SLisandro Dalcin     }
37689566063dSJacob Faibussowitsch     PetscCall(PetscBTDestroy(&bt));
37699566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
37709566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
37719566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
3772b51a4376SLisandro Dalcin   }
3773b51a4376SLisandro Dalcin 
3774b51a4376SLisandro Dalcin   /* store matrix values */
3775b51a4376SLisandro Dalcin   for (i = 0; i < m; i++) {
3776b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
37779927e4dfSBarry Smith     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3778b51a4376SLisandro Dalcin   }
3779b51a4376SLisandro Dalcin 
37809566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
37819566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
37829566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
37839566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
37843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3785b51a4376SLisandro Dalcin }
3786b51a4376SLisandro Dalcin 
3787d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3788d71ae5a4SJacob Faibussowitsch {
37897f489da9SVaclav Hapla   PetscBool isbinary;
3790f501eaabSShri Abhyankar 
3791f501eaabSShri Abhyankar   PetscFunctionBegin;
37929566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
37935f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
37949566063dSJacob Faibussowitsch   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
37953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3796f501eaabSShri Abhyankar }
3797f501eaabSShri Abhyankar 
37985d83a8b1SBarry Smith /*@
379911a5261eSBarry Smith   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3800273d9f13SBarry Smith   compressed row) format.  For good matrix assembly performance the
380120f4b53cSBarry Smith   user should preallocate the matrix storage by setting the parameter `nz`
380220f4b53cSBarry Smith   (or the array `nnz`).
38032593348eSBarry Smith 
3804d083f849SBarry Smith   Collective
3805273d9f13SBarry Smith 
3806273d9f13SBarry Smith   Input Parameters:
380711a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
380811a5261eSBarry Smith . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
380911a5261eSBarry Smith          blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3810273d9f13SBarry Smith . m    - number of rows
3811273d9f13SBarry Smith . n    - number of columns
381235d8aa7fSBarry Smith . nz   - number of nonzero blocks  per block row (same for all rows)
381335d8aa7fSBarry Smith - nnz  - array containing the number of nonzero blocks in the various block rows
381420f4b53cSBarry Smith          (possibly different for each block row) or `NULL`
3815273d9f13SBarry Smith 
3816273d9f13SBarry Smith   Output Parameter:
3817273d9f13SBarry Smith . A - the matrix
3818273d9f13SBarry Smith 
3819273d9f13SBarry Smith   Options Database Keys:
382011a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3821a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3822273d9f13SBarry Smith 
3823273d9f13SBarry Smith   Level: intermediate
3824273d9f13SBarry Smith 
3825273d9f13SBarry Smith   Notes:
382677433607SBarry Smith   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
38272ef1f0ffSBarry Smith   MatXXXXSetPreallocation() paradigm instead of this routine directly.
38282ef1f0ffSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
38292ef1f0ffSBarry Smith 
3830d1be2dadSMatthew Knepley   The number of rows and columns must be divisible by blocksize.
3831d1be2dadSMatthew Knepley 
38322ef1f0ffSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
383349a6f317SBarry Smith 
383435d8aa7fSBarry Smith   A nonzero block is any block that as 1 or more nonzeros in it
383535d8aa7fSBarry Smith 
38362ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3837273d9f13SBarry Smith   storage.  That is, the stored row and column indices can begin at
383820f4b53cSBarry Smith   either one (as in Fortran) or zero.
3839273d9f13SBarry Smith 
38402ef1f0ffSBarry Smith   Specify the preallocated storage with either `nz` or `nnz` (not both).
38412ef1f0ffSBarry Smith   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3842651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3843273d9f13SBarry Smith   matrices.
3844273d9f13SBarry Smith 
38451cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3846273d9f13SBarry Smith @*/
3847d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3848d71ae5a4SJacob Faibussowitsch {
3849273d9f13SBarry Smith   PetscFunctionBegin;
38509566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
38519566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
38529566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJ));
38539566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
38543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3855273d9f13SBarry Smith }
3856273d9f13SBarry Smith 
38575d83a8b1SBarry Smith /*@
3858273d9f13SBarry Smith   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3859273d9f13SBarry Smith   per row in the matrix. For good matrix assembly performance the
386020f4b53cSBarry Smith   user should preallocate the matrix storage by setting the parameter `nz`
386120f4b53cSBarry Smith   (or the array `nnz`).
3862273d9f13SBarry Smith 
3863d083f849SBarry Smith   Collective
3864273d9f13SBarry Smith 
3865273d9f13SBarry Smith   Input Parameters:
38661c4f3114SJed Brown + B   - the matrix
386711a5261eSBarry Smith . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
386811a5261eSBarry Smith         blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3869273d9f13SBarry Smith . nz  - number of block nonzeros per block row (same for all rows)
3870273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows
38712ef1f0ffSBarry Smith         (possibly different for each block row) or `NULL`
3872273d9f13SBarry Smith 
3873273d9f13SBarry Smith   Options Database Keys:
387411a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3875a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3876273d9f13SBarry Smith 
3877273d9f13SBarry Smith   Level: intermediate
3878273d9f13SBarry Smith 
3879273d9f13SBarry Smith   Notes:
38802ef1f0ffSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
388149a6f317SBarry Smith 
388211a5261eSBarry Smith   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3883aa95bbe8SBarry Smith   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
388420f4b53cSBarry Smith   You can also run with the option `-info` and look for messages with the string
3885aa95bbe8SBarry Smith   malloc in them to see if additional memory allocation was needed.
3886aa95bbe8SBarry Smith 
38872ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3888273d9f13SBarry Smith   storage.  That is, the stored row and column indices can begin at
388920f4b53cSBarry Smith   either one (as in Fortran) or zero.
3890273d9f13SBarry Smith 
3891d8a51d2aSBarry Smith   Specify the preallocated storage with either `nz` or `nnz` (not both).
38922ef1f0ffSBarry Smith   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3893651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3894273d9f13SBarry Smith 
38951cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3896273d9f13SBarry Smith @*/
3897d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3898d71ae5a4SJacob Faibussowitsch {
3899273d9f13SBarry Smith   PetscFunctionBegin;
39006ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
39016ba663aaSJed Brown   PetscValidType(B, 1);
39026ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3903cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
39043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3905273d9f13SBarry Smith }
3906a1d92eedSBarry Smith 
3907725b52f3SLisandro Dalcin /*@C
390811a5261eSBarry Smith   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3909725b52f3SLisandro Dalcin 
3910d083f849SBarry Smith   Collective
3911725b52f3SLisandro Dalcin 
3912725b52f3SLisandro Dalcin   Input Parameters:
39131c4f3114SJed Brown + B  - the matrix
391420f4b53cSBarry Smith . bs - the blocksize
3915d8a51d2aSBarry Smith . i  - the indices into `j` for the start of each local row (indices start with zero)
3916d8a51d2aSBarry Smith . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
3917d8a51d2aSBarry Smith - v  - optional values in the matrix, use `NULL` if not provided
3918725b52f3SLisandro Dalcin 
3919664954b6SBarry Smith   Level: advanced
3920725b52f3SLisandro Dalcin 
39213adadaf3SJed Brown   Notes:
3922d8a51d2aSBarry Smith   The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3923d8a51d2aSBarry Smith 
392411a5261eSBarry Smith   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
392511a5261eSBarry Smith   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
39263adadaf3SJed Brown   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
392711a5261eSBarry Smith   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
39283adadaf3SJed Brown   block column and the second index is over columns within a block.
39293adadaf3SJed Brown 
3930664954b6SBarry Smith   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3931664954b6SBarry Smith 
39321cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3933725b52f3SLisandro Dalcin @*/
3934d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3935d71ae5a4SJacob Faibussowitsch {
3936725b52f3SLisandro Dalcin   PetscFunctionBegin;
39376ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
39386ba663aaSJed Brown   PetscValidType(B, 1);
39396ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3940cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
39413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3942725b52f3SLisandro Dalcin }
3943725b52f3SLisandro Dalcin 
3944c75a6043SHong Zhang /*@
394511a5261eSBarry Smith   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3946c75a6043SHong Zhang 
3947d083f849SBarry Smith   Collective
3948c75a6043SHong Zhang 
3949c75a6043SHong Zhang   Input Parameters:
3950c75a6043SHong Zhang + comm - must be an MPI communicator of size 1
3951c75a6043SHong Zhang . bs   - size of block
3952c75a6043SHong Zhang . m    - number of rows
3953c75a6043SHong Zhang . n    - number of columns
3954483a2f95SBarry Smith . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3955c75a6043SHong Zhang . j    - column indices
3956c75a6043SHong Zhang - a    - matrix values
3957c75a6043SHong Zhang 
3958c75a6043SHong Zhang   Output Parameter:
3959c75a6043SHong Zhang . mat - the matrix
3960c75a6043SHong Zhang 
3961dfb205c3SBarry Smith   Level: advanced
3962c75a6043SHong Zhang 
3963c75a6043SHong Zhang   Notes:
39642ef1f0ffSBarry Smith   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3965c75a6043SHong Zhang   once the matrix is destroyed
3966c75a6043SHong Zhang 
3967c75a6043SHong Zhang   You cannot set new nonzero locations into this matrix, that will generate an error.
3968c75a6043SHong Zhang 
39692ef1f0ffSBarry Smith   The `i` and `j` indices are 0 based
3970c75a6043SHong Zhang 
397111a5261eSBarry Smith   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3972dfb205c3SBarry Smith 
39733adadaf3SJed Brown   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
39743adadaf3SJed Brown   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
39753adadaf3SJed Brown   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
39763adadaf3SJed Brown   with column-major ordering within blocks.
3977dfb205c3SBarry Smith 
39781cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3979c75a6043SHong Zhang @*/
3980d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3981d71ae5a4SJacob Faibussowitsch {
3982c75a6043SHong Zhang   Mat_SeqBAIJ *baij;
3983c75a6043SHong Zhang 
3984c75a6043SHong Zhang   PetscFunctionBegin;
39855f80ce2aSJacob Faibussowitsch   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
39865f80ce2aSJacob Faibussowitsch   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3987c75a6043SHong Zhang 
39889566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
39899566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, m, n));
39909566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATSEQBAIJ));
39919566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3992c75a6043SHong Zhang   baij = (Mat_SeqBAIJ *)(*mat)->data;
39939566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3994c75a6043SHong Zhang 
3995c75a6043SHong Zhang   baij->i = i;
3996c75a6043SHong Zhang   baij->j = j;
3997c75a6043SHong Zhang   baij->a = a;
399826fbe8dcSKarl Rupp 
3999c75a6043SHong Zhang   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
4000e6b907acSBarry Smith   baij->free_a         = PETSC_FALSE;
4001e6b907acSBarry Smith   baij->free_ij        = PETSC_FALSE;
4002ceb5bf51SJacob Faibussowitsch   baij->free_imax_ilen = PETSC_TRUE;
4003c75a6043SHong Zhang 
4004ceb5bf51SJacob Faibussowitsch   for (PetscInt ii = 0; ii < m; ii++) {
4005ceb5bf51SJacob Faibussowitsch     const PetscInt row_len = i[ii + 1] - i[ii];
4006ceb5bf51SJacob Faibussowitsch 
4007ceb5bf51SJacob Faibussowitsch     baij->ilen[ii] = baij->imax[ii] = row_len;
4008ceb5bf51SJacob Faibussowitsch     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
4009c75a6043SHong Zhang   }
401076bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
4011ceb5bf51SJacob Faibussowitsch     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
40126bdcaf15SBarry Smith       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
40136bdcaf15SBarry Smith       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
4014c75a6043SHong Zhang     }
401576bd3646SJed Brown   }
4016c75a6043SHong Zhang 
40179566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
40189566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
40193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4020c75a6043SHong Zhang }
4021bdf6f3fcSHong Zhang 
4022d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4023d71ae5a4SJacob Faibussowitsch {
4024bdf6f3fcSHong Zhang   PetscFunctionBegin;
40259566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
40263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4027bdf6f3fcSHong Zhang }
4028