xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 48a46eb9bd028bec07ec0f396b1a3abb43f14558)
1be1d678aSKris Buschelman 
22593348eSBarry Smith /*
3b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
42593348eSBarry Smith   matrix storage format.
52593348eSBarry Smith */
6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7c6db04a5SJed Brown #include <petscblaslapack.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
1043516a2dSKris Buschelman 
117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
137ea3e4caSstefano_zampini #endif
147ea3e4caSstefano_zampini 
15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
17b5b72c8aSIrina Sokolova #endif
18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
19b5b72c8aSIrina Sokolova 
209371c9d4SSatish Balay PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) {
219463ebdaSPierre Jolivet   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
22857cbf51SRichard Tran Mills   PetscInt     m, n, i;
239463ebdaSPierre Jolivet   PetscInt     ib, jb, bs = A->rmap->bs;
249463ebdaSPierre Jolivet   MatScalar   *a_val = a_aij->a;
259463ebdaSPierre Jolivet 
269463ebdaSPierre Jolivet   PetscFunctionBegin;
279566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
28857cbf51SRichard Tran Mills   for (i = 0; i < n; i++) reductions[i] = 0.0;
299463ebdaSPierre Jolivet   if (type == NORM_2) {
309463ebdaSPierre Jolivet     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
319463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
329463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
33857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
349463ebdaSPierre Jolivet           a_val++;
359463ebdaSPierre Jolivet         }
369463ebdaSPierre Jolivet       }
379463ebdaSPierre Jolivet     }
389463ebdaSPierre Jolivet   } else if (type == NORM_1) {
399463ebdaSPierre Jolivet     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
409463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
419463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
42857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
439463ebdaSPierre Jolivet           a_val++;
449463ebdaSPierre Jolivet         }
459463ebdaSPierre Jolivet       }
469463ebdaSPierre Jolivet     }
479463ebdaSPierre Jolivet   } else if (type == NORM_INFINITY) {
489463ebdaSPierre Jolivet     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
499463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
509463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
519463ebdaSPierre Jolivet           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
52857cbf51SRichard Tran Mills           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
539463ebdaSPierre Jolivet           a_val++;
549463ebdaSPierre Jolivet         }
559463ebdaSPierre Jolivet       }
569463ebdaSPierre Jolivet     }
57857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
58857cbf51SRichard Tran Mills     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
59857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
60857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
61857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
62857cbf51SRichard Tran Mills           a_val++;
63857cbf51SRichard Tran Mills         }
64857cbf51SRichard Tran Mills       }
65857cbf51SRichard Tran Mills     }
66857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
67857cbf51SRichard Tran Mills     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
68857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
69857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
70857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
71857cbf51SRichard Tran Mills           a_val++;
72857cbf51SRichard Tran Mills         }
73857cbf51SRichard Tran Mills       }
74857cbf51SRichard Tran Mills     }
75857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
769463ebdaSPierre Jolivet   if (type == NORM_2) {
77857cbf51SRichard Tran Mills     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
78857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79857cbf51SRichard Tran Mills     for (i = 0; i < n; i++) reductions[i] /= m;
809463ebdaSPierre Jolivet   }
819463ebdaSPierre Jolivet   PetscFunctionReturn(0);
829463ebdaSPierre Jolivet }
839463ebdaSPierre Jolivet 
849371c9d4SSatish Balay PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) {
85b01c7715SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
86de80f912SBarry Smith   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
877f0c90edSBarry Smith   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
8862bba022SBarry Smith   PetscReal    shift = 0.0;
891a9391e3SHong Zhang   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
90b01c7715SBarry Smith 
91b01c7715SBarry Smith   PetscFunctionBegin;
92a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
93a455e926SHong Zhang 
949797317bSBarry Smith   if (a->idiagvalid) {
959797317bSBarry Smith     if (values) *values = a->idiag;
969797317bSBarry Smith     PetscFunctionReturn(0);
979797317bSBarry Smith   }
989566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
99b01c7715SBarry Smith   diag_offset = a->diag;
100b01c7715SBarry Smith   if (!a->idiag) {
1019566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
1029566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)A, bs2 * mbs * sizeof(PetscScalar)));
103b01c7715SBarry Smith   }
104b01c7715SBarry Smith   diag = a->idiag;
105bbead8a2SBarry Smith   if (values) *values = a->idiag;
106b01c7715SBarry Smith   /* factor and invert each block */
107521d7252SBarry Smith   switch (bs) {
108ab040260SJed Brown   case 1:
109ab040260SJed Brown     for (i = 0; i < mbs; i++) {
110ab040260SJed Brown       odiag   = v + 1 * diag_offset[i];
111ab040260SJed Brown       diag[0] = odiag[0];
112ec1892c8SHong Zhang 
113ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
114ec1892c8SHong Zhang         if (allowzeropivot) {
1157b6c816cSBarry Smith           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1167b6c816cSBarry Smith           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1177b6c816cSBarry Smith           A->factorerror_zeropivot_row   = i;
1189566063dSJacob Faibussowitsch           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
11998921bdaSJacob Faibussowitsch         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
120ec1892c8SHong Zhang       }
121ec1892c8SHong Zhang 
122d4a378daSJed Brown       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
123ab040260SJed Brown       diag += 1;
124ab040260SJed Brown     }
125ab040260SJed Brown     break;
126b01c7715SBarry Smith   case 2:
127b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
128b01c7715SBarry Smith       odiag   = v + 4 * diag_offset[i];
1299371c9d4SSatish Balay       diag[0] = odiag[0];
1309371c9d4SSatish Balay       diag[1] = odiag[1];
1319371c9d4SSatish Balay       diag[2] = odiag[2];
1329371c9d4SSatish Balay       diag[3] = odiag[3];
1339566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1347b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
135b01c7715SBarry Smith       diag += 4;
136b01c7715SBarry Smith     }
137b01c7715SBarry Smith     break;
138b01c7715SBarry Smith   case 3:
139b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
140b01c7715SBarry Smith       odiag   = v + 9 * diag_offset[i];
1419371c9d4SSatish Balay       diag[0] = odiag[0];
1429371c9d4SSatish Balay       diag[1] = odiag[1];
1439371c9d4SSatish Balay       diag[2] = odiag[2];
1449371c9d4SSatish Balay       diag[3] = odiag[3];
1459371c9d4SSatish Balay       diag[4] = odiag[4];
1469371c9d4SSatish Balay       diag[5] = odiag[5];
1479371c9d4SSatish Balay       diag[6] = odiag[6];
1489371c9d4SSatish Balay       diag[7] = odiag[7];
149b01c7715SBarry Smith       diag[8] = odiag[8];
1509566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
1517b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
152b01c7715SBarry Smith       diag += 9;
153b01c7715SBarry Smith     }
154b01c7715SBarry Smith     break;
155b01c7715SBarry Smith   case 4:
156b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
157b01c7715SBarry Smith       odiag = v + 16 * diag_offset[i];
1589566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 16));
1599566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
1607b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
161b01c7715SBarry Smith       diag += 16;
162b01c7715SBarry Smith     }
163b01c7715SBarry Smith     break;
164b01c7715SBarry Smith   case 5:
165b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
166b01c7715SBarry Smith       odiag = v + 25 * diag_offset[i];
1679566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 25));
1689566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
1697b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
170b01c7715SBarry Smith       diag += 25;
171b01c7715SBarry Smith     }
172b01c7715SBarry Smith     break;
173d49b2adcSBarry Smith   case 6:
174d49b2adcSBarry Smith     for (i = 0; i < mbs; i++) {
175d49b2adcSBarry Smith       odiag = v + 36 * diag_offset[i];
1769566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 36));
1779566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
1787b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
179d49b2adcSBarry Smith       diag += 36;
180d49b2adcSBarry Smith     }
181d49b2adcSBarry Smith     break;
182de80f912SBarry Smith   case 7:
183de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
184de80f912SBarry Smith       odiag = v + 49 * diag_offset[i];
1859566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 49));
1869566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
1877b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
188de80f912SBarry Smith       diag += 49;
189de80f912SBarry Smith     }
190de80f912SBarry Smith     break;
191b01c7715SBarry Smith   default:
1929566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
193de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
194de80f912SBarry Smith       odiag = v + bs2 * diag_offset[i];
1959566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, bs2));
1969566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
1977b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
198de80f912SBarry Smith       diag += bs2;
199de80f912SBarry Smith     }
2009566063dSJacob Faibussowitsch     PetscCall(PetscFree2(v_work, v_pivots));
201b01c7715SBarry Smith   }
202b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
203b01c7715SBarry Smith   PetscFunctionReturn(0);
204b01c7715SBarry Smith }
205b01c7715SBarry Smith 
2069371c9d4SSatish Balay PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
2076d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
208e48d15efSToby Isaac   PetscScalar       *x, *work, *w, *workt, *t;
209e48d15efSToby Isaac   const MatScalar   *v, *aa = a->a, *idiag;
210e48d15efSToby Isaac   const PetscScalar *b, *xb;
2115455b99fSToby Isaac   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
212e48d15efSToby Isaac   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
213c1ac3661SBarry Smith   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
214b01c7715SBarry Smith 
215b01c7715SBarry Smith   PetscFunctionBegin;
216b01c7715SBarry Smith   its = its * lits;
2175f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
2185f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2195f80ce2aSJacob Faibussowitsch   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
2205f80ce2aSJacob Faibussowitsch   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
2215f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
222b01c7715SBarry Smith 
2239566063dSJacob Faibussowitsch   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
224b01c7715SBarry Smith 
225b2ec919aSToby Isaac   if (!m) PetscFunctionReturn(0);
226b01c7715SBarry Smith   diag  = a->diag;
227b01c7715SBarry Smith   idiag = a->idiag;
228de80f912SBarry Smith   k     = PetscMax(A->rmap->n, A->cmap->n);
229*48a46eb9SPierre Jolivet   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
230*48a46eb9SPierre Jolivet   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
231*48a46eb9SPierre Jolivet   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
2323475c22fSBarry Smith   work = a->mult_work;
2333475c22fSBarry Smith   t    = a->sor_workt;
234de80f912SBarry Smith   w    = a->sor_work;
235de80f912SBarry Smith 
2369566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
2379566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
238de80f912SBarry Smith 
239de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
240de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
241e48d15efSToby Isaac       switch (bs) {
242e48d15efSToby Isaac       case 1:
243e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
244e48d15efSToby Isaac         t[0] = b[0];
245e48d15efSToby Isaac         i2   = 1;
246e48d15efSToby Isaac         idiag += 1;
247e48d15efSToby Isaac         for (i = 1; i < m; i++) {
248e48d15efSToby Isaac           v    = aa + ai[i];
249e48d15efSToby Isaac           vi   = aj + ai[i];
250e48d15efSToby Isaac           nz   = diag[i] - ai[i];
251e48d15efSToby Isaac           s[0] = b[i2];
252e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
253e48d15efSToby Isaac             xw[0] = x[vi[j]];
254e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
255e48d15efSToby Isaac           }
256e48d15efSToby Isaac           t[i2] = s[0];
257e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
258e48d15efSToby Isaac           x[i2] = xw[0];
259e48d15efSToby Isaac           idiag += 1;
260e48d15efSToby Isaac           i2 += 1;
261e48d15efSToby Isaac         }
262e48d15efSToby Isaac         break;
263e48d15efSToby Isaac       case 2:
264e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
2659371c9d4SSatish Balay         t[0] = b[0];
2669371c9d4SSatish Balay         t[1] = b[1];
267e48d15efSToby Isaac         i2   = 2;
268e48d15efSToby Isaac         idiag += 4;
269e48d15efSToby Isaac         for (i = 1; i < m; i++) {
270e48d15efSToby Isaac           v    = aa + 4 * ai[i];
271e48d15efSToby Isaac           vi   = aj + ai[i];
272e48d15efSToby Isaac           nz   = diag[i] - ai[i];
2739371c9d4SSatish Balay           s[0] = b[i2];
2749371c9d4SSatish Balay           s[1] = b[i2 + 1];
275e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
276e48d15efSToby Isaac             idx   = 2 * vi[j];
277e48d15efSToby Isaac             it    = 4 * j;
2789371c9d4SSatish Balay             xw[0] = x[idx];
2799371c9d4SSatish Balay             xw[1] = x[1 + idx];
280e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
281e48d15efSToby Isaac           }
2829371c9d4SSatish Balay           t[i2]     = s[0];
2839371c9d4SSatish Balay           t[i2 + 1] = s[1];
284e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
2859371c9d4SSatish Balay           x[i2]     = xw[0];
2869371c9d4SSatish Balay           x[i2 + 1] = xw[1];
287e48d15efSToby Isaac           idiag += 4;
288e48d15efSToby Isaac           i2 += 2;
289e48d15efSToby Isaac         }
290e48d15efSToby Isaac         break;
291e48d15efSToby Isaac       case 3:
292e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
2939371c9d4SSatish Balay         t[0] = b[0];
2949371c9d4SSatish Balay         t[1] = b[1];
2959371c9d4SSatish Balay         t[2] = b[2];
296e48d15efSToby Isaac         i2   = 3;
297e48d15efSToby Isaac         idiag += 9;
298e48d15efSToby Isaac         for (i = 1; i < m; i++) {
299e48d15efSToby Isaac           v    = aa + 9 * ai[i];
300e48d15efSToby Isaac           vi   = aj + ai[i];
301e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3029371c9d4SSatish Balay           s[0] = b[i2];
3039371c9d4SSatish Balay           s[1] = b[i2 + 1];
3049371c9d4SSatish Balay           s[2] = b[i2 + 2];
305e48d15efSToby Isaac           while (nz--) {
306e48d15efSToby Isaac             idx   = 3 * (*vi++);
3079371c9d4SSatish Balay             xw[0] = x[idx];
3089371c9d4SSatish Balay             xw[1] = x[1 + idx];
3099371c9d4SSatish Balay             xw[2] = x[2 + idx];
310e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
311e48d15efSToby Isaac             v += 9;
312e48d15efSToby Isaac           }
3139371c9d4SSatish Balay           t[i2]     = s[0];
3149371c9d4SSatish Balay           t[i2 + 1] = s[1];
3159371c9d4SSatish Balay           t[i2 + 2] = s[2];
316e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
3179371c9d4SSatish Balay           x[i2]     = xw[0];
3189371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3199371c9d4SSatish Balay           x[i2 + 2] = xw[2];
320e48d15efSToby Isaac           idiag += 9;
321e48d15efSToby Isaac           i2 += 3;
322e48d15efSToby Isaac         }
323e48d15efSToby Isaac         break;
324e48d15efSToby Isaac       case 4:
325e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
3269371c9d4SSatish Balay         t[0] = b[0];
3279371c9d4SSatish Balay         t[1] = b[1];
3289371c9d4SSatish Balay         t[2] = b[2];
3299371c9d4SSatish Balay         t[3] = b[3];
330e48d15efSToby Isaac         i2   = 4;
331e48d15efSToby Isaac         idiag += 16;
332e48d15efSToby Isaac         for (i = 1; i < m; i++) {
333e48d15efSToby Isaac           v    = aa + 16 * ai[i];
334e48d15efSToby Isaac           vi   = aj + ai[i];
335e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3369371c9d4SSatish Balay           s[0] = b[i2];
3379371c9d4SSatish Balay           s[1] = b[i2 + 1];
3389371c9d4SSatish Balay           s[2] = b[i2 + 2];
3399371c9d4SSatish Balay           s[3] = b[i2 + 3];
340e48d15efSToby Isaac           while (nz--) {
341e48d15efSToby Isaac             idx   = 4 * (*vi++);
3429371c9d4SSatish Balay             xw[0] = x[idx];
3439371c9d4SSatish Balay             xw[1] = x[1 + idx];
3449371c9d4SSatish Balay             xw[2] = x[2 + idx];
3459371c9d4SSatish Balay             xw[3] = x[3 + idx];
346e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
347e48d15efSToby Isaac             v += 16;
348e48d15efSToby Isaac           }
3499371c9d4SSatish Balay           t[i2]     = s[0];
3509371c9d4SSatish Balay           t[i2 + 1] = s[1];
3519371c9d4SSatish Balay           t[i2 + 2] = s[2];
3529371c9d4SSatish Balay           t[i2 + 3] = s[3];
353e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
3549371c9d4SSatish Balay           x[i2]     = xw[0];
3559371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3569371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3579371c9d4SSatish Balay           x[i2 + 3] = xw[3];
358e48d15efSToby Isaac           idiag += 16;
359e48d15efSToby Isaac           i2 += 4;
360e48d15efSToby Isaac         }
361e48d15efSToby Isaac         break;
362e48d15efSToby Isaac       case 5:
363e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
3649371c9d4SSatish Balay         t[0] = b[0];
3659371c9d4SSatish Balay         t[1] = b[1];
3669371c9d4SSatish Balay         t[2] = b[2];
3679371c9d4SSatish Balay         t[3] = b[3];
3689371c9d4SSatish Balay         t[4] = b[4];
369e48d15efSToby Isaac         i2   = 5;
370e48d15efSToby Isaac         idiag += 25;
371e48d15efSToby Isaac         for (i = 1; i < m; i++) {
372e48d15efSToby Isaac           v    = aa + 25 * ai[i];
373e48d15efSToby Isaac           vi   = aj + ai[i];
374e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3759371c9d4SSatish Balay           s[0] = b[i2];
3769371c9d4SSatish Balay           s[1] = b[i2 + 1];
3779371c9d4SSatish Balay           s[2] = b[i2 + 2];
3789371c9d4SSatish Balay           s[3] = b[i2 + 3];
3799371c9d4SSatish Balay           s[4] = b[i2 + 4];
380e48d15efSToby Isaac           while (nz--) {
381e48d15efSToby Isaac             idx   = 5 * (*vi++);
3829371c9d4SSatish Balay             xw[0] = x[idx];
3839371c9d4SSatish Balay             xw[1] = x[1 + idx];
3849371c9d4SSatish Balay             xw[2] = x[2 + idx];
3859371c9d4SSatish Balay             xw[3] = x[3 + idx];
3869371c9d4SSatish Balay             xw[4] = x[4 + idx];
387e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
388e48d15efSToby Isaac             v += 25;
389e48d15efSToby Isaac           }
3909371c9d4SSatish Balay           t[i2]     = s[0];
3919371c9d4SSatish Balay           t[i2 + 1] = s[1];
3929371c9d4SSatish Balay           t[i2 + 2] = s[2];
3939371c9d4SSatish Balay           t[i2 + 3] = s[3];
3949371c9d4SSatish Balay           t[i2 + 4] = s[4];
395e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
3969371c9d4SSatish Balay           x[i2]     = xw[0];
3979371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3989371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3999371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4009371c9d4SSatish Balay           x[i2 + 4] = xw[4];
401e48d15efSToby Isaac           idiag += 25;
402e48d15efSToby Isaac           i2 += 5;
403e48d15efSToby Isaac         }
404e48d15efSToby Isaac         break;
405e48d15efSToby Isaac       case 6:
406e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
4079371c9d4SSatish Balay         t[0] = b[0];
4089371c9d4SSatish Balay         t[1] = b[1];
4099371c9d4SSatish Balay         t[2] = b[2];
4109371c9d4SSatish Balay         t[3] = b[3];
4119371c9d4SSatish Balay         t[4] = b[4];
4129371c9d4SSatish Balay         t[5] = b[5];
413e48d15efSToby Isaac         i2   = 6;
414e48d15efSToby Isaac         idiag += 36;
415e48d15efSToby Isaac         for (i = 1; i < m; i++) {
416e48d15efSToby Isaac           v    = aa + 36 * ai[i];
417e48d15efSToby Isaac           vi   = aj + ai[i];
418e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4199371c9d4SSatish Balay           s[0] = b[i2];
4209371c9d4SSatish Balay           s[1] = b[i2 + 1];
4219371c9d4SSatish Balay           s[2] = b[i2 + 2];
4229371c9d4SSatish Balay           s[3] = b[i2 + 3];
4239371c9d4SSatish Balay           s[4] = b[i2 + 4];
4249371c9d4SSatish Balay           s[5] = b[i2 + 5];
425e48d15efSToby Isaac           while (nz--) {
426e48d15efSToby Isaac             idx   = 6 * (*vi++);
4279371c9d4SSatish Balay             xw[0] = x[idx];
4289371c9d4SSatish Balay             xw[1] = x[1 + idx];
4299371c9d4SSatish Balay             xw[2] = x[2 + idx];
4309371c9d4SSatish Balay             xw[3] = x[3 + idx];
4319371c9d4SSatish Balay             xw[4] = x[4 + idx];
4329371c9d4SSatish Balay             xw[5] = x[5 + idx];
433e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
434e48d15efSToby Isaac             v += 36;
435e48d15efSToby Isaac           }
4369371c9d4SSatish Balay           t[i2]     = s[0];
4379371c9d4SSatish Balay           t[i2 + 1] = s[1];
4389371c9d4SSatish Balay           t[i2 + 2] = s[2];
4399371c9d4SSatish Balay           t[i2 + 3] = s[3];
4409371c9d4SSatish Balay           t[i2 + 4] = s[4];
4419371c9d4SSatish Balay           t[i2 + 5] = s[5];
442e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
4439371c9d4SSatish Balay           x[i2]     = xw[0];
4449371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4459371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4469371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4479371c9d4SSatish Balay           x[i2 + 4] = xw[4];
4489371c9d4SSatish Balay           x[i2 + 5] = xw[5];
449e48d15efSToby Isaac           idiag += 36;
450e48d15efSToby Isaac           i2 += 6;
451e48d15efSToby Isaac         }
452e48d15efSToby Isaac         break;
453e48d15efSToby Isaac       case 7:
454e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
4559371c9d4SSatish Balay         t[0] = b[0];
4569371c9d4SSatish Balay         t[1] = b[1];
4579371c9d4SSatish Balay         t[2] = b[2];
4589371c9d4SSatish Balay         t[3] = b[3];
4599371c9d4SSatish Balay         t[4] = b[4];
4609371c9d4SSatish Balay         t[5] = b[5];
4619371c9d4SSatish Balay         t[6] = b[6];
462e48d15efSToby Isaac         i2   = 7;
463e48d15efSToby Isaac         idiag += 49;
464e48d15efSToby Isaac         for (i = 1; i < m; i++) {
465e48d15efSToby Isaac           v    = aa + 49 * ai[i];
466e48d15efSToby Isaac           vi   = aj + ai[i];
467e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4689371c9d4SSatish Balay           s[0] = b[i2];
4699371c9d4SSatish Balay           s[1] = b[i2 + 1];
4709371c9d4SSatish Balay           s[2] = b[i2 + 2];
4719371c9d4SSatish Balay           s[3] = b[i2 + 3];
4729371c9d4SSatish Balay           s[4] = b[i2 + 4];
4739371c9d4SSatish Balay           s[5] = b[i2 + 5];
4749371c9d4SSatish Balay           s[6] = b[i2 + 6];
475e48d15efSToby Isaac           while (nz--) {
476e48d15efSToby Isaac             idx   = 7 * (*vi++);
4779371c9d4SSatish Balay             xw[0] = x[idx];
4789371c9d4SSatish Balay             xw[1] = x[1 + idx];
4799371c9d4SSatish Balay             xw[2] = x[2 + idx];
4809371c9d4SSatish Balay             xw[3] = x[3 + idx];
4819371c9d4SSatish Balay             xw[4] = x[4 + idx];
4829371c9d4SSatish Balay             xw[5] = x[5 + idx];
4839371c9d4SSatish Balay             xw[6] = x[6 + idx];
484e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
485e48d15efSToby Isaac             v += 49;
486e48d15efSToby Isaac           }
4879371c9d4SSatish Balay           t[i2]     = s[0];
4889371c9d4SSatish Balay           t[i2 + 1] = s[1];
4899371c9d4SSatish Balay           t[i2 + 2] = s[2];
4909371c9d4SSatish Balay           t[i2 + 3] = s[3];
4919371c9d4SSatish Balay           t[i2 + 4] = s[4];
4929371c9d4SSatish Balay           t[i2 + 5] = s[5];
4939371c9d4SSatish Balay           t[i2 + 6] = s[6];
494e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
4959371c9d4SSatish Balay           x[i2]     = xw[0];
4969371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4979371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4989371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4999371c9d4SSatish Balay           x[i2 + 4] = xw[4];
5009371c9d4SSatish Balay           x[i2 + 5] = xw[5];
5019371c9d4SSatish Balay           x[i2 + 6] = xw[6];
502e48d15efSToby Isaac           idiag += 49;
503e48d15efSToby Isaac           i2 += 7;
504e48d15efSToby Isaac         }
505e48d15efSToby Isaac         break;
506e48d15efSToby Isaac       default:
50796b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
5089566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(t, b, bs));
509de80f912SBarry Smith         i2 = bs;
510de80f912SBarry Smith         idiag += bs2;
511de80f912SBarry Smith         for (i = 1; i < m; i++) {
512de80f912SBarry Smith           v  = aa + bs2 * ai[i];
513de80f912SBarry Smith           vi = aj + ai[i];
514de80f912SBarry Smith           nz = diag[i] - ai[i];
515de80f912SBarry Smith 
5169566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
517de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
518de80f912SBarry Smith           workt = work;
519de80f912SBarry Smith           for (j = 0; j < nz; j++) {
5209566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
521de80f912SBarry Smith             workt += bs;
522de80f912SBarry Smith           }
52396b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
5249566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(t + i2, w, bs));
52596b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
526de80f912SBarry Smith 
527de80f912SBarry Smith           idiag += bs2;
528de80f912SBarry Smith           i2 += bs;
529de80f912SBarry Smith         }
530e48d15efSToby Isaac         break;
531e48d15efSToby Isaac       }
532de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
5339566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
534e48d15efSToby Isaac       xb = t;
5359371c9d4SSatish Balay     } else xb = b;
536de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
537e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
538e48d15efSToby Isaac       i2    = bs * (m - 1);
539e48d15efSToby Isaac       switch (bs) {
540e48d15efSToby Isaac       case 1:
541e48d15efSToby Isaac         s[0] = xb[i2];
542e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
543e48d15efSToby Isaac         x[i2] = xw[0];
544e48d15efSToby Isaac         i2 -= 1;
545e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
546e48d15efSToby Isaac           v    = aa + (diag[i] + 1);
547e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
548e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
549e48d15efSToby Isaac           s[0] = xb[i2];
550e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
551e48d15efSToby Isaac             xw[0] = x[vi[j]];
552e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
553e48d15efSToby Isaac           }
554e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555e48d15efSToby Isaac           x[i2] = xw[0];
556e48d15efSToby Isaac           idiag -= 1;
557e48d15efSToby Isaac           i2 -= 1;
558e48d15efSToby Isaac         }
559e48d15efSToby Isaac         break;
560e48d15efSToby Isaac       case 2:
5619371c9d4SSatish Balay         s[0] = xb[i2];
5629371c9d4SSatish Balay         s[1] = xb[i2 + 1];
563e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5649371c9d4SSatish Balay         x[i2]     = xw[0];
5659371c9d4SSatish Balay         x[i2 + 1] = xw[1];
566e48d15efSToby Isaac         i2 -= 2;
567e48d15efSToby Isaac         idiag -= 4;
568e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
569e48d15efSToby Isaac           v    = aa + 4 * (diag[i] + 1);
570e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
571e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
5729371c9d4SSatish Balay           s[0] = xb[i2];
5739371c9d4SSatish Balay           s[1] = xb[i2 + 1];
574e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
575e48d15efSToby Isaac             idx   = 2 * vi[j];
576e48d15efSToby Isaac             it    = 4 * j;
5779371c9d4SSatish Balay             xw[0] = x[idx];
5789371c9d4SSatish Balay             xw[1] = x[1 + idx];
579e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
580e48d15efSToby Isaac           }
581e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5829371c9d4SSatish Balay           x[i2]     = xw[0];
5839371c9d4SSatish Balay           x[i2 + 1] = xw[1];
584e48d15efSToby Isaac           idiag -= 4;
585e48d15efSToby Isaac           i2 -= 2;
586e48d15efSToby Isaac         }
587e48d15efSToby Isaac         break;
588e48d15efSToby Isaac       case 3:
5899371c9d4SSatish Balay         s[0] = xb[i2];
5909371c9d4SSatish Balay         s[1] = xb[i2 + 1];
5919371c9d4SSatish Balay         s[2] = xb[i2 + 2];
592e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
5939371c9d4SSatish Balay         x[i2]     = xw[0];
5949371c9d4SSatish Balay         x[i2 + 1] = xw[1];
5959371c9d4SSatish Balay         x[i2 + 2] = xw[2];
596e48d15efSToby Isaac         i2 -= 3;
597e48d15efSToby Isaac         idiag -= 9;
598e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
599e48d15efSToby Isaac           v    = aa + 9 * (diag[i] + 1);
600e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
601e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6029371c9d4SSatish Balay           s[0] = xb[i2];
6039371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6049371c9d4SSatish Balay           s[2] = xb[i2 + 2];
605e48d15efSToby Isaac           while (nz--) {
606e48d15efSToby Isaac             idx   = 3 * (*vi++);
6079371c9d4SSatish Balay             xw[0] = x[idx];
6089371c9d4SSatish Balay             xw[1] = x[1 + idx];
6099371c9d4SSatish Balay             xw[2] = x[2 + idx];
610e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
611e48d15efSToby Isaac             v += 9;
612e48d15efSToby Isaac           }
613e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6149371c9d4SSatish Balay           x[i2]     = xw[0];
6159371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6169371c9d4SSatish Balay           x[i2 + 2] = xw[2];
617e48d15efSToby Isaac           idiag -= 9;
618e48d15efSToby Isaac           i2 -= 3;
619e48d15efSToby Isaac         }
620e48d15efSToby Isaac         break;
621e48d15efSToby Isaac       case 4:
6229371c9d4SSatish Balay         s[0] = xb[i2];
6239371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6249371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6259371c9d4SSatish Balay         s[3] = xb[i2 + 3];
626e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6279371c9d4SSatish Balay         x[i2]     = xw[0];
6289371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6299371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6309371c9d4SSatish Balay         x[i2 + 3] = xw[3];
631e48d15efSToby Isaac         i2 -= 4;
632e48d15efSToby Isaac         idiag -= 16;
633e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
634e48d15efSToby Isaac           v    = aa + 16 * (diag[i] + 1);
635e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
636e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6379371c9d4SSatish Balay           s[0] = xb[i2];
6389371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6399371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6409371c9d4SSatish Balay           s[3] = xb[i2 + 3];
641e48d15efSToby Isaac           while (nz--) {
642e48d15efSToby Isaac             idx   = 4 * (*vi++);
6439371c9d4SSatish Balay             xw[0] = x[idx];
6449371c9d4SSatish Balay             xw[1] = x[1 + idx];
6459371c9d4SSatish Balay             xw[2] = x[2 + idx];
6469371c9d4SSatish Balay             xw[3] = x[3 + idx];
647e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
648e48d15efSToby Isaac             v += 16;
649e48d15efSToby Isaac           }
650e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6519371c9d4SSatish Balay           x[i2]     = xw[0];
6529371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6539371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6549371c9d4SSatish Balay           x[i2 + 3] = xw[3];
655e48d15efSToby Isaac           idiag -= 16;
656e48d15efSToby Isaac           i2 -= 4;
657e48d15efSToby Isaac         }
658e48d15efSToby Isaac         break;
659e48d15efSToby Isaac       case 5:
6609371c9d4SSatish Balay         s[0] = xb[i2];
6619371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6629371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6639371c9d4SSatish Balay         s[3] = xb[i2 + 3];
6649371c9d4SSatish Balay         s[4] = xb[i2 + 4];
665e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6669371c9d4SSatish Balay         x[i2]     = xw[0];
6679371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6689371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6699371c9d4SSatish Balay         x[i2 + 3] = xw[3];
6709371c9d4SSatish Balay         x[i2 + 4] = xw[4];
671e48d15efSToby Isaac         i2 -= 5;
672e48d15efSToby Isaac         idiag -= 25;
673e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
674e48d15efSToby Isaac           v    = aa + 25 * (diag[i] + 1);
675e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
676e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6779371c9d4SSatish Balay           s[0] = xb[i2];
6789371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6799371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6809371c9d4SSatish Balay           s[3] = xb[i2 + 3];
6819371c9d4SSatish Balay           s[4] = xb[i2 + 4];
682e48d15efSToby Isaac           while (nz--) {
683e48d15efSToby Isaac             idx   = 5 * (*vi++);
6849371c9d4SSatish Balay             xw[0] = x[idx];
6859371c9d4SSatish Balay             xw[1] = x[1 + idx];
6869371c9d4SSatish Balay             xw[2] = x[2 + idx];
6879371c9d4SSatish Balay             xw[3] = x[3 + idx];
6889371c9d4SSatish Balay             xw[4] = x[4 + idx];
689e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
690e48d15efSToby Isaac             v += 25;
691e48d15efSToby Isaac           }
692e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6939371c9d4SSatish Balay           x[i2]     = xw[0];
6949371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6959371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6969371c9d4SSatish Balay           x[i2 + 3] = xw[3];
6979371c9d4SSatish Balay           x[i2 + 4] = xw[4];
698e48d15efSToby Isaac           idiag -= 25;
699e48d15efSToby Isaac           i2 -= 5;
700e48d15efSToby Isaac         }
701e48d15efSToby Isaac         break;
702e48d15efSToby Isaac       case 6:
7039371c9d4SSatish Balay         s[0] = xb[i2];
7049371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7059371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7069371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7079371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7089371c9d4SSatish Balay         s[5] = xb[i2 + 5];
709e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7109371c9d4SSatish Balay         x[i2]     = xw[0];
7119371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7129371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7139371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7149371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7159371c9d4SSatish Balay         x[i2 + 5] = xw[5];
716e48d15efSToby Isaac         i2 -= 6;
717e48d15efSToby Isaac         idiag -= 36;
718e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
719e48d15efSToby Isaac           v    = aa + 36 * (diag[i] + 1);
720e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
721e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7229371c9d4SSatish Balay           s[0] = xb[i2];
7239371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7249371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7259371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7269371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7279371c9d4SSatish Balay           s[5] = xb[i2 + 5];
728e48d15efSToby Isaac           while (nz--) {
729e48d15efSToby Isaac             idx   = 6 * (*vi++);
7309371c9d4SSatish Balay             xw[0] = x[idx];
7319371c9d4SSatish Balay             xw[1] = x[1 + idx];
7329371c9d4SSatish Balay             xw[2] = x[2 + idx];
7339371c9d4SSatish Balay             xw[3] = x[3 + idx];
7349371c9d4SSatish Balay             xw[4] = x[4 + idx];
7359371c9d4SSatish Balay             xw[5] = x[5 + idx];
736e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
737e48d15efSToby Isaac             v += 36;
738e48d15efSToby Isaac           }
739e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7409371c9d4SSatish Balay           x[i2]     = xw[0];
7419371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7429371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7439371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7449371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7459371c9d4SSatish Balay           x[i2 + 5] = xw[5];
746e48d15efSToby Isaac           idiag -= 36;
747e48d15efSToby Isaac           i2 -= 6;
748e48d15efSToby Isaac         }
749e48d15efSToby Isaac         break;
750e48d15efSToby Isaac       case 7:
7519371c9d4SSatish Balay         s[0] = xb[i2];
7529371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7539371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7549371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7559371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7569371c9d4SSatish Balay         s[5] = xb[i2 + 5];
7579371c9d4SSatish Balay         s[6] = xb[i2 + 6];
758e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
7599371c9d4SSatish Balay         x[i2]     = xw[0];
7609371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7619371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7629371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7639371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7649371c9d4SSatish Balay         x[i2 + 5] = xw[5];
7659371c9d4SSatish Balay         x[i2 + 6] = xw[6];
766e48d15efSToby Isaac         i2 -= 7;
767e48d15efSToby Isaac         idiag -= 49;
768e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
769e48d15efSToby Isaac           v    = aa + 49 * (diag[i] + 1);
770e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
771e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7729371c9d4SSatish Balay           s[0] = xb[i2];
7739371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7749371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7759371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7769371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7779371c9d4SSatish Balay           s[5] = xb[i2 + 5];
7789371c9d4SSatish Balay           s[6] = xb[i2 + 6];
779e48d15efSToby Isaac           while (nz--) {
780e48d15efSToby Isaac             idx   = 7 * (*vi++);
7819371c9d4SSatish Balay             xw[0] = x[idx];
7829371c9d4SSatish Balay             xw[1] = x[1 + idx];
7839371c9d4SSatish Balay             xw[2] = x[2 + idx];
7849371c9d4SSatish Balay             xw[3] = x[3 + idx];
7859371c9d4SSatish Balay             xw[4] = x[4 + idx];
7869371c9d4SSatish Balay             xw[5] = x[5 + idx];
7879371c9d4SSatish Balay             xw[6] = x[6 + idx];
788e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
789e48d15efSToby Isaac             v += 49;
790e48d15efSToby Isaac           }
791e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
7929371c9d4SSatish Balay           x[i2]     = xw[0];
7939371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7949371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7959371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7969371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7979371c9d4SSatish Balay           x[i2 + 5] = xw[5];
7989371c9d4SSatish Balay           x[i2 + 6] = xw[6];
799e48d15efSToby Isaac           idiag -= 49;
800e48d15efSToby Isaac           i2 -= 7;
801e48d15efSToby Isaac         }
802e48d15efSToby Isaac         break;
803e48d15efSToby Isaac       default:
8049566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(w, xb + i2, bs));
80596b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
806de80f912SBarry Smith         i2 -= bs;
807e48d15efSToby Isaac         idiag -= bs2;
808de80f912SBarry Smith         for (i = m - 2; i >= 0; i--) {
809de80f912SBarry Smith           v  = aa + bs2 * (diag[i] + 1);
810de80f912SBarry Smith           vi = aj + diag[i] + 1;
811de80f912SBarry Smith           nz = ai[i + 1] - diag[i] - 1;
812de80f912SBarry Smith 
8139566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, xb + i2, bs));
814de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
815de80f912SBarry Smith           workt = work;
816de80f912SBarry Smith           for (j = 0; j < nz; j++) {
8179566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
818de80f912SBarry Smith             workt += bs;
819de80f912SBarry Smith           }
82096b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
82196b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
822e48d15efSToby Isaac 
823de80f912SBarry Smith           idiag -= bs2;
824de80f912SBarry Smith           i2 -= bs;
825de80f912SBarry Smith         }
826e48d15efSToby Isaac         break;
827e48d15efSToby Isaac       }
8289566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
829de80f912SBarry Smith     }
830e48d15efSToby Isaac     its--;
831e48d15efSToby Isaac   }
832e48d15efSToby Isaac   while (its--) {
833e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
834e48d15efSToby Isaac       idiag = a->idiag;
835e48d15efSToby Isaac       i2    = 0;
836e48d15efSToby Isaac       switch (bs) {
837e48d15efSToby Isaac       case 1:
838e48d15efSToby Isaac         for (i = 0; i < m; i++) {
839e48d15efSToby Isaac           v    = aa + ai[i];
840e48d15efSToby Isaac           vi   = aj + ai[i];
841e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
842e48d15efSToby Isaac           s[0] = b[i2];
843e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
844e48d15efSToby Isaac             xw[0] = x[vi[j]];
845e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
846e48d15efSToby Isaac           }
847e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
848e48d15efSToby Isaac           x[i2] += xw[0];
849e48d15efSToby Isaac           idiag += 1;
850e48d15efSToby Isaac           i2 += 1;
851e48d15efSToby Isaac         }
852e48d15efSToby Isaac         break;
853e48d15efSToby Isaac       case 2:
854e48d15efSToby Isaac         for (i = 0; i < m; i++) {
855e48d15efSToby Isaac           v    = aa + 4 * ai[i];
856e48d15efSToby Isaac           vi   = aj + ai[i];
857e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8589371c9d4SSatish Balay           s[0] = b[i2];
8599371c9d4SSatish Balay           s[1] = b[i2 + 1];
860e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
861e48d15efSToby Isaac             idx   = 2 * vi[j];
862e48d15efSToby Isaac             it    = 4 * j;
8639371c9d4SSatish Balay             xw[0] = x[idx];
8649371c9d4SSatish Balay             xw[1] = x[1 + idx];
865e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
866e48d15efSToby Isaac           }
867e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
8689371c9d4SSatish Balay           x[i2] += xw[0];
8699371c9d4SSatish Balay           x[i2 + 1] += xw[1];
870e48d15efSToby Isaac           idiag += 4;
871e48d15efSToby Isaac           i2 += 2;
872e48d15efSToby Isaac         }
873e48d15efSToby Isaac         break;
874e48d15efSToby Isaac       case 3:
875e48d15efSToby Isaac         for (i = 0; i < m; i++) {
876e48d15efSToby Isaac           v    = aa + 9 * ai[i];
877e48d15efSToby Isaac           vi   = aj + ai[i];
878e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8799371c9d4SSatish Balay           s[0] = b[i2];
8809371c9d4SSatish Balay           s[1] = b[i2 + 1];
8819371c9d4SSatish Balay           s[2] = b[i2 + 2];
882e48d15efSToby Isaac           while (nz--) {
883e48d15efSToby Isaac             idx   = 3 * (*vi++);
8849371c9d4SSatish Balay             xw[0] = x[idx];
8859371c9d4SSatish Balay             xw[1] = x[1 + idx];
8869371c9d4SSatish Balay             xw[2] = x[2 + idx];
887e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
888e48d15efSToby Isaac             v += 9;
889e48d15efSToby Isaac           }
890e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
8919371c9d4SSatish Balay           x[i2] += xw[0];
8929371c9d4SSatish Balay           x[i2 + 1] += xw[1];
8939371c9d4SSatish Balay           x[i2 + 2] += xw[2];
894e48d15efSToby Isaac           idiag += 9;
895e48d15efSToby Isaac           i2 += 3;
896e48d15efSToby Isaac         }
897e48d15efSToby Isaac         break;
898e48d15efSToby Isaac       case 4:
899e48d15efSToby Isaac         for (i = 0; i < m; i++) {
900e48d15efSToby Isaac           v    = aa + 16 * ai[i];
901e48d15efSToby Isaac           vi   = aj + ai[i];
902e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9039371c9d4SSatish Balay           s[0] = b[i2];
9049371c9d4SSatish Balay           s[1] = b[i2 + 1];
9059371c9d4SSatish Balay           s[2] = b[i2 + 2];
9069371c9d4SSatish Balay           s[3] = b[i2 + 3];
907e48d15efSToby Isaac           while (nz--) {
908e48d15efSToby Isaac             idx   = 4 * (*vi++);
9099371c9d4SSatish Balay             xw[0] = x[idx];
9109371c9d4SSatish Balay             xw[1] = x[1 + idx];
9119371c9d4SSatish Balay             xw[2] = x[2 + idx];
9129371c9d4SSatish Balay             xw[3] = x[3 + idx];
913e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
914e48d15efSToby Isaac             v += 16;
915e48d15efSToby Isaac           }
916e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
9179371c9d4SSatish Balay           x[i2] += xw[0];
9189371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9199371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9209371c9d4SSatish Balay           x[i2 + 3] += xw[3];
921e48d15efSToby Isaac           idiag += 16;
922e48d15efSToby Isaac           i2 += 4;
923e48d15efSToby Isaac         }
924e48d15efSToby Isaac         break;
925e48d15efSToby Isaac       case 5:
926e48d15efSToby Isaac         for (i = 0; i < m; i++) {
927e48d15efSToby Isaac           v    = aa + 25 * ai[i];
928e48d15efSToby Isaac           vi   = aj + ai[i];
929e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9309371c9d4SSatish Balay           s[0] = b[i2];
9319371c9d4SSatish Balay           s[1] = b[i2 + 1];
9329371c9d4SSatish Balay           s[2] = b[i2 + 2];
9339371c9d4SSatish Balay           s[3] = b[i2 + 3];
9349371c9d4SSatish Balay           s[4] = b[i2 + 4];
935e48d15efSToby Isaac           while (nz--) {
936e48d15efSToby Isaac             idx   = 5 * (*vi++);
9379371c9d4SSatish Balay             xw[0] = x[idx];
9389371c9d4SSatish Balay             xw[1] = x[1 + idx];
9399371c9d4SSatish Balay             xw[2] = x[2 + idx];
9409371c9d4SSatish Balay             xw[3] = x[3 + idx];
9419371c9d4SSatish Balay             xw[4] = x[4 + idx];
942e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
943e48d15efSToby Isaac             v += 25;
944e48d15efSToby Isaac           }
945e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
9469371c9d4SSatish Balay           x[i2] += xw[0];
9479371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9489371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9499371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9509371c9d4SSatish Balay           x[i2 + 4] += xw[4];
951e48d15efSToby Isaac           idiag += 25;
952e48d15efSToby Isaac           i2 += 5;
953e48d15efSToby Isaac         }
954e48d15efSToby Isaac         break;
955e48d15efSToby Isaac       case 6:
956e48d15efSToby Isaac         for (i = 0; i < m; i++) {
957e48d15efSToby Isaac           v    = aa + 36 * ai[i];
958e48d15efSToby Isaac           vi   = aj + ai[i];
959e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9609371c9d4SSatish Balay           s[0] = b[i2];
9619371c9d4SSatish Balay           s[1] = b[i2 + 1];
9629371c9d4SSatish Balay           s[2] = b[i2 + 2];
9639371c9d4SSatish Balay           s[3] = b[i2 + 3];
9649371c9d4SSatish Balay           s[4] = b[i2 + 4];
9659371c9d4SSatish Balay           s[5] = b[i2 + 5];
966e48d15efSToby Isaac           while (nz--) {
967e48d15efSToby Isaac             idx   = 6 * (*vi++);
9689371c9d4SSatish Balay             xw[0] = x[idx];
9699371c9d4SSatish Balay             xw[1] = x[1 + idx];
9709371c9d4SSatish Balay             xw[2] = x[2 + idx];
9719371c9d4SSatish Balay             xw[3] = x[3 + idx];
9729371c9d4SSatish Balay             xw[4] = x[4 + idx];
9739371c9d4SSatish Balay             xw[5] = x[5 + idx];
974e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
975e48d15efSToby Isaac             v += 36;
976e48d15efSToby Isaac           }
977e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
9789371c9d4SSatish Balay           x[i2] += xw[0];
9799371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9809371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9819371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9829371c9d4SSatish Balay           x[i2 + 4] += xw[4];
9839371c9d4SSatish Balay           x[i2 + 5] += xw[5];
984e48d15efSToby Isaac           idiag += 36;
985e48d15efSToby Isaac           i2 += 6;
986e48d15efSToby Isaac         }
987e48d15efSToby Isaac         break;
988e48d15efSToby Isaac       case 7:
989e48d15efSToby Isaac         for (i = 0; i < m; i++) {
990e48d15efSToby Isaac           v    = aa + 49 * ai[i];
991e48d15efSToby Isaac           vi   = aj + ai[i];
992e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9939371c9d4SSatish Balay           s[0] = b[i2];
9949371c9d4SSatish Balay           s[1] = b[i2 + 1];
9959371c9d4SSatish Balay           s[2] = b[i2 + 2];
9969371c9d4SSatish Balay           s[3] = b[i2 + 3];
9979371c9d4SSatish Balay           s[4] = b[i2 + 4];
9989371c9d4SSatish Balay           s[5] = b[i2 + 5];
9999371c9d4SSatish Balay           s[6] = b[i2 + 6];
1000e48d15efSToby Isaac           while (nz--) {
1001e48d15efSToby Isaac             idx   = 7 * (*vi++);
10029371c9d4SSatish Balay             xw[0] = x[idx];
10039371c9d4SSatish Balay             xw[1] = x[1 + idx];
10049371c9d4SSatish Balay             xw[2] = x[2 + idx];
10059371c9d4SSatish Balay             xw[3] = x[3 + idx];
10069371c9d4SSatish Balay             xw[4] = x[4 + idx];
10079371c9d4SSatish Balay             xw[5] = x[5 + idx];
10089371c9d4SSatish Balay             xw[6] = x[6 + idx];
1009e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1010e48d15efSToby Isaac             v += 49;
1011e48d15efSToby Isaac           }
1012e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
10139371c9d4SSatish Balay           x[i2] += xw[0];
10149371c9d4SSatish Balay           x[i2 + 1] += xw[1];
10159371c9d4SSatish Balay           x[i2 + 2] += xw[2];
10169371c9d4SSatish Balay           x[i2 + 3] += xw[3];
10179371c9d4SSatish Balay           x[i2 + 4] += xw[4];
10189371c9d4SSatish Balay           x[i2 + 5] += xw[5];
10199371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1020e48d15efSToby Isaac           idiag += 49;
1021e48d15efSToby Isaac           i2 += 7;
1022e48d15efSToby Isaac         }
1023e48d15efSToby Isaac         break;
1024e48d15efSToby Isaac       default:
1025e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1026e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1027e48d15efSToby Isaac           vi = aj + ai[i];
1028e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1029e48d15efSToby Isaac 
10309566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1031e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1032e48d15efSToby Isaac           workt = work;
1033e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
10349566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1035e48d15efSToby Isaac             workt += bs;
1036e48d15efSToby Isaac           }
1037e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1038e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1039e48d15efSToby Isaac 
1040e48d15efSToby Isaac           idiag += bs2;
1041e48d15efSToby Isaac           i2 += bs;
1042e48d15efSToby Isaac         }
1043e48d15efSToby Isaac         break;
1044e48d15efSToby Isaac       }
10459566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1046e48d15efSToby Isaac     }
1047e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1048e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
1049e48d15efSToby Isaac       i2    = bs * (m - 1);
1050e48d15efSToby Isaac       switch (bs) {
1051e48d15efSToby Isaac       case 1:
1052e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1053e48d15efSToby Isaac           v    = aa + ai[i];
1054e48d15efSToby Isaac           vi   = aj + ai[i];
1055e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
1056e48d15efSToby Isaac           s[0] = b[i2];
1057e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1058e48d15efSToby Isaac             xw[0] = x[vi[j]];
1059e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1060e48d15efSToby Isaac           }
1061e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1062e48d15efSToby Isaac           x[i2] += xw[0];
1063e48d15efSToby Isaac           idiag -= 1;
1064e48d15efSToby Isaac           i2 -= 1;
1065e48d15efSToby Isaac         }
1066e48d15efSToby Isaac         break;
1067e48d15efSToby Isaac       case 2:
1068e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1069e48d15efSToby Isaac           v    = aa + 4 * ai[i];
1070e48d15efSToby Isaac           vi   = aj + ai[i];
1071e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10729371c9d4SSatish Balay           s[0] = b[i2];
10739371c9d4SSatish Balay           s[1] = b[i2 + 1];
1074e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1075e48d15efSToby Isaac             idx   = 2 * vi[j];
1076e48d15efSToby Isaac             it    = 4 * j;
10779371c9d4SSatish Balay             xw[0] = x[idx];
10789371c9d4SSatish Balay             xw[1] = x[1 + idx];
1079e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1080e48d15efSToby Isaac           }
1081e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
10829371c9d4SSatish Balay           x[i2] += xw[0];
10839371c9d4SSatish Balay           x[i2 + 1] += xw[1];
1084e48d15efSToby Isaac           idiag -= 4;
1085e48d15efSToby Isaac           i2 -= 2;
1086e48d15efSToby Isaac         }
1087e48d15efSToby Isaac         break;
1088e48d15efSToby Isaac       case 3:
1089e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1090e48d15efSToby Isaac           v    = aa + 9 * ai[i];
1091e48d15efSToby Isaac           vi   = aj + ai[i];
1092e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10939371c9d4SSatish Balay           s[0] = b[i2];
10949371c9d4SSatish Balay           s[1] = b[i2 + 1];
10959371c9d4SSatish Balay           s[2] = b[i2 + 2];
1096e48d15efSToby Isaac           while (nz--) {
1097e48d15efSToby Isaac             idx   = 3 * (*vi++);
10989371c9d4SSatish Balay             xw[0] = x[idx];
10999371c9d4SSatish Balay             xw[1] = x[1 + idx];
11009371c9d4SSatish Balay             xw[2] = x[2 + idx];
1101e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1102e48d15efSToby Isaac             v += 9;
1103e48d15efSToby Isaac           }
1104e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
11059371c9d4SSatish Balay           x[i2] += xw[0];
11069371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11079371c9d4SSatish Balay           x[i2 + 2] += xw[2];
1108e48d15efSToby Isaac           idiag -= 9;
1109e48d15efSToby Isaac           i2 -= 3;
1110e48d15efSToby Isaac         }
1111e48d15efSToby Isaac         break;
1112e48d15efSToby Isaac       case 4:
1113e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1114e48d15efSToby Isaac           v    = aa + 16 * ai[i];
1115e48d15efSToby Isaac           vi   = aj + ai[i];
1116e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11179371c9d4SSatish Balay           s[0] = b[i2];
11189371c9d4SSatish Balay           s[1] = b[i2 + 1];
11199371c9d4SSatish Balay           s[2] = b[i2 + 2];
11209371c9d4SSatish Balay           s[3] = b[i2 + 3];
1121e48d15efSToby Isaac           while (nz--) {
1122e48d15efSToby Isaac             idx   = 4 * (*vi++);
11239371c9d4SSatish Balay             xw[0] = x[idx];
11249371c9d4SSatish Balay             xw[1] = x[1 + idx];
11259371c9d4SSatish Balay             xw[2] = x[2 + idx];
11269371c9d4SSatish Balay             xw[3] = x[3 + idx];
1127e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1128e48d15efSToby Isaac             v += 16;
1129e48d15efSToby Isaac           }
1130e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
11319371c9d4SSatish Balay           x[i2] += xw[0];
11329371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11339371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11349371c9d4SSatish Balay           x[i2 + 3] += xw[3];
1135e48d15efSToby Isaac           idiag -= 16;
1136e48d15efSToby Isaac           i2 -= 4;
1137e48d15efSToby Isaac         }
1138e48d15efSToby Isaac         break;
1139e48d15efSToby Isaac       case 5:
1140e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1141e48d15efSToby Isaac           v    = aa + 25 * ai[i];
1142e48d15efSToby Isaac           vi   = aj + ai[i];
1143e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11449371c9d4SSatish Balay           s[0] = b[i2];
11459371c9d4SSatish Balay           s[1] = b[i2 + 1];
11469371c9d4SSatish Balay           s[2] = b[i2 + 2];
11479371c9d4SSatish Balay           s[3] = b[i2 + 3];
11489371c9d4SSatish Balay           s[4] = b[i2 + 4];
1149e48d15efSToby Isaac           while (nz--) {
1150e48d15efSToby Isaac             idx   = 5 * (*vi++);
11519371c9d4SSatish Balay             xw[0] = x[idx];
11529371c9d4SSatish Balay             xw[1] = x[1 + idx];
11539371c9d4SSatish Balay             xw[2] = x[2 + idx];
11549371c9d4SSatish Balay             xw[3] = x[3 + idx];
11559371c9d4SSatish Balay             xw[4] = x[4 + idx];
1156e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1157e48d15efSToby Isaac             v += 25;
1158e48d15efSToby Isaac           }
1159e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
11609371c9d4SSatish Balay           x[i2] += xw[0];
11619371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11629371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11639371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11649371c9d4SSatish Balay           x[i2 + 4] += xw[4];
1165e48d15efSToby Isaac           idiag -= 25;
1166e48d15efSToby Isaac           i2 -= 5;
1167e48d15efSToby Isaac         }
1168e48d15efSToby Isaac         break;
1169e48d15efSToby Isaac       case 6:
1170e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1171e48d15efSToby Isaac           v    = aa + 36 * ai[i];
1172e48d15efSToby Isaac           vi   = aj + ai[i];
1173e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11749371c9d4SSatish Balay           s[0] = b[i2];
11759371c9d4SSatish Balay           s[1] = b[i2 + 1];
11769371c9d4SSatish Balay           s[2] = b[i2 + 2];
11779371c9d4SSatish Balay           s[3] = b[i2 + 3];
11789371c9d4SSatish Balay           s[4] = b[i2 + 4];
11799371c9d4SSatish Balay           s[5] = b[i2 + 5];
1180e48d15efSToby Isaac           while (nz--) {
1181e48d15efSToby Isaac             idx   = 6 * (*vi++);
11829371c9d4SSatish Balay             xw[0] = x[idx];
11839371c9d4SSatish Balay             xw[1] = x[1 + idx];
11849371c9d4SSatish Balay             xw[2] = x[2 + idx];
11859371c9d4SSatish Balay             xw[3] = x[3 + idx];
11869371c9d4SSatish Balay             xw[4] = x[4 + idx];
11879371c9d4SSatish Balay             xw[5] = x[5 + idx];
1188e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1189e48d15efSToby Isaac             v += 36;
1190e48d15efSToby Isaac           }
1191e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
11929371c9d4SSatish Balay           x[i2] += xw[0];
11939371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11949371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11959371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11969371c9d4SSatish Balay           x[i2 + 4] += xw[4];
11979371c9d4SSatish Balay           x[i2 + 5] += xw[5];
1198e48d15efSToby Isaac           idiag -= 36;
1199e48d15efSToby Isaac           i2 -= 6;
1200e48d15efSToby Isaac         }
1201e48d15efSToby Isaac         break;
1202e48d15efSToby Isaac       case 7:
1203e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1204e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1205e48d15efSToby Isaac           vi   = aj + ai[i];
1206e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
12079371c9d4SSatish Balay           s[0] = b[i2];
12089371c9d4SSatish Balay           s[1] = b[i2 + 1];
12099371c9d4SSatish Balay           s[2] = b[i2 + 2];
12109371c9d4SSatish Balay           s[3] = b[i2 + 3];
12119371c9d4SSatish Balay           s[4] = b[i2 + 4];
12129371c9d4SSatish Balay           s[5] = b[i2 + 5];
12139371c9d4SSatish Balay           s[6] = b[i2 + 6];
1214e48d15efSToby Isaac           while (nz--) {
1215e48d15efSToby Isaac             idx   = 7 * (*vi++);
12169371c9d4SSatish Balay             xw[0] = x[idx];
12179371c9d4SSatish Balay             xw[1] = x[1 + idx];
12189371c9d4SSatish Balay             xw[2] = x[2 + idx];
12199371c9d4SSatish Balay             xw[3] = x[3 + idx];
12209371c9d4SSatish Balay             xw[4] = x[4 + idx];
12219371c9d4SSatish Balay             xw[5] = x[5 + idx];
12229371c9d4SSatish Balay             xw[6] = x[6 + idx];
1223e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1224e48d15efSToby Isaac             v += 49;
1225e48d15efSToby Isaac           }
1226e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
12279371c9d4SSatish Balay           x[i2] += xw[0];
12289371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12299371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12309371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12319371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12329371c9d4SSatish Balay           x[i2 + 5] += xw[5];
12339371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1234e48d15efSToby Isaac           idiag -= 49;
1235e48d15efSToby Isaac           i2 -= 7;
1236e48d15efSToby Isaac         }
1237e48d15efSToby Isaac         break;
1238e48d15efSToby Isaac       default:
1239e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1240e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1241e48d15efSToby Isaac           vi = aj + ai[i];
1242e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1243e48d15efSToby Isaac 
12449566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1245e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1246e48d15efSToby Isaac           workt = work;
1247e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
12489566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1249e48d15efSToby Isaac             workt += bs;
1250e48d15efSToby Isaac           }
1251e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1252e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1253e48d15efSToby Isaac 
1254e48d15efSToby Isaac           idiag -= bs2;
1255e48d15efSToby Isaac           i2 -= bs;
1256e48d15efSToby Isaac         }
1257e48d15efSToby Isaac         break;
1258e48d15efSToby Isaac       }
12599566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1260e48d15efSToby Isaac     }
1261e48d15efSToby Isaac   }
12629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
1264de80f912SBarry Smith   PetscFunctionReturn(0);
1265de80f912SBarry Smith }
1266de80f912SBarry Smith 
1267af674e45SBarry Smith /*
126881824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1269af674e45SBarry Smith */
1270af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1271af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1272af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1273af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4
1274af674e45SBarry Smith #endif
1275af674e45SBarry Smith 
12769371c9d4SSatish Balay PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) {
1277af674e45SBarry Smith   Mat                A = *AA;
1278af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1279c1ac3661SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1280c1ac3661SBarry Smith   PetscInt          *ai = a->i, *ailen = a->ilen;
128117ec6a02SBarry Smith   PetscInt          *aj = a->j, stepval, lastcol = -1;
1282f15d580aSBarry Smith   const PetscScalar *value = v;
12834bb09213Spetsc   MatScalar         *ap, *aa = a->a, *bap;
1284af674e45SBarry Smith 
1285af674e45SBarry Smith   PetscFunctionBegin;
1286ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1287af674e45SBarry Smith   stepval = (n - 1) * 4;
1288af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
1289af674e45SBarry Smith     row  = im[k];
1290af674e45SBarry Smith     rp   = aj + ai[row];
1291af674e45SBarry Smith     ap   = aa + 16 * ai[row];
1292af674e45SBarry Smith     nrow = ailen[row];
1293af674e45SBarry Smith     low  = 0;
129417ec6a02SBarry Smith     high = nrow;
1295af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
1296af674e45SBarry Smith       col = in[l];
1297db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1298db4deed7SKarl Rupp       else high = nrow;
129917ec6a02SBarry Smith       lastcol = col;
13001e3347e8SBarry Smith       value   = v + k * (stepval + 4 + l) * 4;
1301af674e45SBarry Smith       while (high - low > 7) {
1302af674e45SBarry Smith         t = (low + high) / 2;
1303af674e45SBarry Smith         if (rp[t] > col) high = t;
1304af674e45SBarry Smith         else low = t;
1305af674e45SBarry Smith       }
1306af674e45SBarry Smith       for (i = low; i < high; i++) {
1307af674e45SBarry Smith         if (rp[i] > col) break;
1308af674e45SBarry Smith         if (rp[i] == col) {
1309af674e45SBarry Smith           bap = ap + 16 * i;
1310af674e45SBarry Smith           for (ii = 0; ii < 4; ii++, value += stepval) {
13119371c9d4SSatish Balay             for (jj = ii; jj < 16; jj += 4) { bap[jj] += *value++; }
1312af674e45SBarry Smith           }
1313af674e45SBarry Smith           goto noinsert2;
1314af674e45SBarry Smith         }
1315af674e45SBarry Smith       }
1316af674e45SBarry Smith       N = nrow++ - 1;
131717ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
1318af674e45SBarry Smith       /* shift up all the later entries in this row */
1319af674e45SBarry Smith       for (ii = N; ii >= i; ii--) {
1320af674e45SBarry Smith         rp[ii + 1] = rp[ii];
13219566063dSJacob Faibussowitsch         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1322af674e45SBarry Smith       }
1323*48a46eb9SPierre Jolivet       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1324af674e45SBarry Smith       rp[i] = col;
1325af674e45SBarry Smith       bap   = ap + 16 * i;
1326af674e45SBarry Smith       for (ii = 0; ii < 4; ii++, value += stepval) {
13279371c9d4SSatish Balay         for (jj = ii; jj < 16; jj += 4) { bap[jj] = *value++; }
1328af674e45SBarry Smith       }
1329af674e45SBarry Smith     noinsert2:;
1330af674e45SBarry Smith       low = i;
1331af674e45SBarry Smith     }
1332af674e45SBarry Smith     ailen[row] = nrow;
1333af674e45SBarry Smith   }
1334be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1335af674e45SBarry Smith }
1336af674e45SBarry Smith 
1337af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1338af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4
1339af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1340af674e45SBarry Smith #define matsetvalues4_ matsetvalues4
1341af674e45SBarry Smith #endif
1342af674e45SBarry Smith 
13439371c9d4SSatish Balay PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) {
1344af674e45SBarry Smith   Mat          A = *AA;
1345af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1346580bdb30SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1347c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
1348c1ac3661SBarry Smith   PetscInt    *aj = a->j, brow, bcol;
134917ec6a02SBarry Smith   PetscInt     ridx, cidx, lastcol = -1;
1350af674e45SBarry Smith   MatScalar   *ap, value, *aa      = a->a, *bap;
1351af674e45SBarry Smith 
1352af674e45SBarry Smith   PetscFunctionBegin;
1353af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
13549371c9d4SSatish Balay     row  = im[k];
13559371c9d4SSatish Balay     brow = row / 4;
1356af674e45SBarry Smith     rp   = aj + ai[brow];
1357af674e45SBarry Smith     ap   = aa + 16 * ai[brow];
1358af674e45SBarry Smith     nrow = ailen[brow];
1359af674e45SBarry Smith     low  = 0;
136017ec6a02SBarry Smith     high = nrow;
1361af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
13629371c9d4SSatish Balay       col   = in[l];
13639371c9d4SSatish Balay       bcol  = col / 4;
13649371c9d4SSatish Balay       ridx  = row % 4;
13659371c9d4SSatish Balay       cidx  = col % 4;
1366af674e45SBarry Smith       value = v[l + k * n];
1367db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1368db4deed7SKarl Rupp       else high = nrow;
136917ec6a02SBarry Smith       lastcol = col;
1370af674e45SBarry Smith       while (high - low > 7) {
1371af674e45SBarry Smith         t = (low + high) / 2;
1372af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1373af674e45SBarry Smith         else low = t;
1374af674e45SBarry Smith       }
1375af674e45SBarry Smith       for (i = low; i < high; i++) {
1376af674e45SBarry Smith         if (rp[i] > bcol) break;
1377af674e45SBarry Smith         if (rp[i] == bcol) {
1378af674e45SBarry Smith           bap = ap + 16 * i + 4 * cidx + ridx;
1379af674e45SBarry Smith           *bap += value;
1380af674e45SBarry Smith           goto noinsert1;
1381af674e45SBarry Smith         }
1382af674e45SBarry Smith       }
1383af674e45SBarry Smith       N = nrow++ - 1;
138417ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1385af674e45SBarry Smith       /* shift up all the later entries in this row */
13869566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
13879566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
13889566063dSJacob Faibussowitsch       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1389af674e45SBarry Smith       rp[i]                        = bcol;
1390af674e45SBarry Smith       ap[16 * i + 4 * cidx + ridx] = value;
1391af674e45SBarry Smith     noinsert1:;
1392af674e45SBarry Smith       low = i;
1393af674e45SBarry Smith     }
1394af674e45SBarry Smith     ailen[brow] = nrow;
1395af674e45SBarry Smith   }
1396be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1397af674e45SBarry Smith }
1398af674e45SBarry Smith 
1399be5855fcSBarry Smith /*
1400be5855fcSBarry Smith      Checks for missing diagonals
1401be5855fcSBarry Smith */
14029371c9d4SSatish Balay PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) {
1403be5855fcSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14047734d3b5SMatthew G. Knepley   PetscInt    *diag, *ii = a->i, i;
1405be5855fcSBarry Smith 
1406be5855fcSBarry Smith   PetscFunctionBegin;
14079566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
14082af78befSBarry Smith   *missing = PETSC_FALSE;
14097734d3b5SMatthew G. Knepley   if (A->rmap->n > 0 && !ii) {
14102efa7f71SHong Zhang     *missing = PETSC_TRUE;
14112efa7f71SHong Zhang     if (d) *d = 0;
14129566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
14132efa7f71SHong Zhang   } else {
141401445905SHong Zhang     PetscInt n;
141501445905SHong Zhang     n    = PetscMin(a->mbs, a->nbs);
1416883fce79SBarry Smith     diag = a->diag;
141701445905SHong Zhang     for (i = 0; i < n; i++) {
14187734d3b5SMatthew G. Knepley       if (diag[i] >= ii[i + 1]) {
14192af78befSBarry Smith         *missing = PETSC_TRUE;
14202af78befSBarry Smith         if (d) *d = i;
14219566063dSJacob Faibussowitsch         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1422358d2f5dSShri Abhyankar         break;
14232efa7f71SHong Zhang       }
1424be5855fcSBarry Smith     }
1425be5855fcSBarry Smith   }
1426be5855fcSBarry Smith   PetscFunctionReturn(0);
1427be5855fcSBarry Smith }
1428be5855fcSBarry Smith 
14299371c9d4SSatish Balay PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) {
1430de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
143109f38230SBarry Smith   PetscInt     i, j, m = a->mbs;
1432de6a44a3SBarry Smith 
14333a40ed3dSBarry Smith   PetscFunctionBegin;
143409f38230SBarry Smith   if (!a->diag) {
14359566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(m, &a->diag));
14369566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)A, m * sizeof(PetscInt)));
14374fd072dbSBarry Smith     a->free_diag = PETSC_TRUE;
143809f38230SBarry Smith   }
14397fc0212eSBarry Smith   for (i = 0; i < m; i++) {
144009f38230SBarry Smith     a->diag[i] = a->i[i + 1];
1441de6a44a3SBarry Smith     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1442de6a44a3SBarry Smith       if (a->j[j] == i) {
144309f38230SBarry Smith         a->diag[i] = j;
1444de6a44a3SBarry Smith         break;
1445de6a44a3SBarry Smith       }
1446de6a44a3SBarry Smith     }
1447de6a44a3SBarry Smith   }
14483a40ed3dSBarry Smith   PetscFunctionReturn(0);
1449de6a44a3SBarry Smith }
14502593348eSBarry Smith 
14519371c9d4SSatish Balay static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) {
14523b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14531a83f524SJed Brown   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
14541a83f524SJed Brown   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
14553b2fbd54SBarry Smith 
14563a40ed3dSBarry Smith   PetscFunctionBegin;
14573b2fbd54SBarry Smith   *nn = n;
14583a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
14593b2fbd54SBarry Smith   if (symmetric) {
14609566063dSJacob Faibussowitsch     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1461553b3c51SBarry Smith     nz = tia[n];
14623b2fbd54SBarry Smith   } else {
14639371c9d4SSatish Balay     tia = a->i;
14649371c9d4SSatish Balay     tja = a->j;
14653b2fbd54SBarry Smith   }
14663b2fbd54SBarry Smith 
1467ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1468ecc77c7aSBarry Smith     (*nn) *= bs;
14698f7157efSSatish Balay     /* malloc & create the natural set of indices */
14709566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((n + 1) * bs, ia));
14719985e31cSBarry Smith     if (n) {
14722462f5fdSStefano Zampini       (*ia)[0] = oshift;
14739371c9d4SSatish Balay       for (j = 1; j < bs; j++) { (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; }
14749985e31cSBarry Smith     }
1475ecc77c7aSBarry Smith 
1476ecc77c7aSBarry Smith     for (i = 1; i < n; i++) {
1477ecc77c7aSBarry Smith       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
14789371c9d4SSatish Balay       for (j = 1; j < bs; j++) { (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; }
14798f7157efSSatish Balay     }
14809371c9d4SSatish Balay     if (n) { (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; }
1481ecc77c7aSBarry Smith 
14821a83f524SJed Brown     if (inja) {
14839566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz * bs * bs, ja));
14849985e31cSBarry Smith       cnt = 0;
14859985e31cSBarry Smith       for (i = 0; i < n; i++) {
14869985e31cSBarry Smith         for (j = 0; j < bs; j++) {
14879985e31cSBarry Smith           for (k = tia[i]; k < tia[i + 1]; k++) {
14889371c9d4SSatish Balay             for (l = 0; l < bs; l++) { (*ja)[cnt++] = bs * tja[k] + l; }
14899985e31cSBarry Smith           }
14909985e31cSBarry Smith         }
14919985e31cSBarry Smith       }
14929985e31cSBarry Smith     }
14939985e31cSBarry Smith 
14948f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
14959566063dSJacob Faibussowitsch       PetscCall(PetscFree(tia));
14969566063dSJacob Faibussowitsch       PetscCall(PetscFree(tja));
14978f7157efSSatish Balay     }
1498f6d58c54SBarry Smith   } else if (oshift == 1) {
1499715a17b5SBarry Smith     if (symmetric) {
1500a2ea699eSBarry Smith       nz = tia[A->rmap->n / bs];
1501715a17b5SBarry Smith       /*  add 1 to i and j indices */
1502715a17b5SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1503715a17b5SBarry Smith       *ia = tia;
1504715a17b5SBarry Smith       if (ja) {
1505715a17b5SBarry Smith         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1506715a17b5SBarry Smith         *ja = tja;
1507715a17b5SBarry Smith       }
1508715a17b5SBarry Smith     } else {
1509a2ea699eSBarry Smith       nz = a->i[A->rmap->n / bs];
1510f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
15119566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1512f6d58c54SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1513f6d58c54SBarry Smith       if (ja) {
15149566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(nz, ja));
1515f6d58c54SBarry Smith         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1516f6d58c54SBarry Smith       }
1517715a17b5SBarry Smith     }
15188f7157efSSatish Balay   } else {
15198f7157efSSatish Balay     *ia = tia;
1520ecc77c7aSBarry Smith     if (ja) *ja = tja;
15218f7157efSSatish Balay   }
15223a40ed3dSBarry Smith   PetscFunctionReturn(0);
15233b2fbd54SBarry Smith }
15243b2fbd54SBarry Smith 
15259371c9d4SSatish Balay static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
15263a40ed3dSBarry Smith   PetscFunctionBegin;
15273a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
1528715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
15299566063dSJacob Faibussowitsch     PetscCall(PetscFree(*ia));
15309566063dSJacob Faibussowitsch     if (ja) PetscCall(PetscFree(*ja));
15313b2fbd54SBarry Smith   }
15323a40ed3dSBarry Smith   PetscFunctionReturn(0);
15333b2fbd54SBarry Smith }
15343b2fbd54SBarry Smith 
15359371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqBAIJ(Mat A) {
15362d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15372d61bbb3SSatish Balay 
1538433994e6SBarry Smith   PetscFunctionBegin;
1539aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1540c0aa6a63SJacob Faibussowitsch   PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz);
15412d61bbb3SSatish Balay #endif
15429566063dSJacob Faibussowitsch   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
15439566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
15449566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
15459566063dSJacob Faibussowitsch   if (a->free_diag) PetscCall(PetscFree(a->diag));
15469566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->idiag));
15479566063dSJacob Faibussowitsch   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
15489566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->solve_work));
15499566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->mult_work));
15509566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_workt));
15519566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_work));
15529566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
15539566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->saved_values));
15549566063dSJacob Faibussowitsch   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1555c4319e64SHong Zhang 
15569566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->sbaijMat));
15579566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->parent));
15589566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1559901853e0SKris Buschelman 
15609566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
15619566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
15629566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
15639566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
15649566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
15659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
15669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
15679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
15689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
15699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
15709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
15719566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
15727ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
15739566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
15747ea3e4caSstefano_zampini #endif
15759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
15762e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
15772d61bbb3SSatish Balay   PetscFunctionReturn(0);
15782d61bbb3SSatish Balay }
15792d61bbb3SSatish Balay 
15809371c9d4SSatish Balay PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) {
15812d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15822d61bbb3SSatish Balay 
15832d61bbb3SSatish Balay   PetscFunctionBegin;
1584aa275fccSKris Buschelman   switch (op) {
15859371c9d4SSatish Balay   case MAT_ROW_ORIENTED: a->roworiented = flg; break;
15869371c9d4SSatish Balay   case MAT_KEEP_NONZERO_PATTERN: a->keepnonzeropattern = flg; break;
15879371c9d4SSatish Balay   case MAT_NEW_NONZERO_LOCATIONS: a->nonew = (flg ? 0 : 1); break;
15889371c9d4SSatish Balay   case MAT_NEW_NONZERO_LOCATION_ERR: a->nonew = (flg ? -1 : 0); break;
15899371c9d4SSatish Balay   case MAT_NEW_NONZERO_ALLOCATION_ERR: a->nonew = (flg ? -2 : 0); break;
15909371c9d4SSatish Balay   case MAT_UNUSED_NONZERO_LOCATION_ERR: a->nounused = (flg ? -1 : 0); break;
15918c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1592aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
1593aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
15949371c9d4SSatish Balay   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
15955021d80fSJed Brown   case MAT_SPD:
159677e54ba9SKris Buschelman   case MAT_SYMMETRIC:
159777e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
15989a4540c5SBarry Smith   case MAT_HERMITIAN:
15999a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1600b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1601c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1602672ba085SHong Zhang   case MAT_STRUCTURE_ONLY:
1603b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
1604b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
160577e54ba9SKris Buschelman     break;
16069371c9d4SSatish Balay   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
16072d61bbb3SSatish Balay   }
16082d61bbb3SSatish Balay   PetscFunctionReturn(0);
16092d61bbb3SSatish Balay }
16102d61bbb3SSatish Balay 
161152768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
16129371c9d4SSatish Balay PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) {
161352768537SHong Zhang   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
161452768537SHong Zhang   MatScalar   *aa_i;
161587828ca2SBarry Smith   PetscScalar *v_i;
16162d61bbb3SSatish Balay 
16172d61bbb3SSatish Balay   PetscFunctionBegin;
1618d0f46423SBarry Smith   bs  = A->rmap->bs;
161952768537SHong Zhang   bs2 = bs * bs;
16205f80ce2aSJacob Faibussowitsch   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
16212d61bbb3SSatish Balay 
16222d61bbb3SSatish Balay   bn  = row / bs; /* Block number */
16232d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
16242d61bbb3SSatish Balay   M   = ai[bn + 1] - ai[bn];
16252d61bbb3SSatish Balay   *nz = bs * M;
16262d61bbb3SSatish Balay 
16272d61bbb3SSatish Balay   if (v) {
1628f4259b30SLisandro Dalcin     *v = NULL;
16292d61bbb3SSatish Balay     if (*nz) {
16309566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, v));
16312d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16322d61bbb3SSatish Balay         v_i  = *v + i * bs;
16332d61bbb3SSatish Balay         aa_i = aa + bs2 * (ai[bn] + i);
163426fbe8dcSKarl Rupp         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
16352d61bbb3SSatish Balay       }
16362d61bbb3SSatish Balay     }
16372d61bbb3SSatish Balay   }
16382d61bbb3SSatish Balay 
16392d61bbb3SSatish Balay   if (idx) {
1640f4259b30SLisandro Dalcin     *idx = NULL;
16412d61bbb3SSatish Balay     if (*nz) {
16429566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, idx));
16432d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16442d61bbb3SSatish Balay         idx_i = *idx + i * bs;
16452d61bbb3SSatish Balay         itmp  = bs * aj[ai[bn] + i];
164626fbe8dcSKarl Rupp         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
16472d61bbb3SSatish Balay       }
16482d61bbb3SSatish Balay     }
16492d61bbb3SSatish Balay   }
16502d61bbb3SSatish Balay   PetscFunctionReturn(0);
16512d61bbb3SSatish Balay }
16522d61bbb3SSatish Balay 
16539371c9d4SSatish Balay PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
165452768537SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
165552768537SHong Zhang 
165652768537SHong Zhang   PetscFunctionBegin;
16579566063dSJacob Faibussowitsch   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
165852768537SHong Zhang   PetscFunctionReturn(0);
165952768537SHong Zhang }
166052768537SHong Zhang 
16619371c9d4SSatish Balay PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
16622d61bbb3SSatish Balay   PetscFunctionBegin;
1663cb4a9cd9SHong Zhang   if (nz) *nz = 0;
16649566063dSJacob Faibussowitsch   if (idx) PetscCall(PetscFree(*idx));
16659566063dSJacob Faibussowitsch   if (v) PetscCall(PetscFree(*v));
16662d61bbb3SSatish Balay   PetscFunctionReturn(0);
16672d61bbb3SSatish Balay }
16682d61bbb3SSatish Balay 
16699371c9d4SSatish Balay PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) {
167020e84f26SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
16712d61bbb3SSatish Balay   Mat          C;
167220e84f26SHong Zhang   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
167320e84f26SHong Zhang   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
167420e84f26SHong Zhang   MatScalar   *ata, *aa = a->a;
16752d61bbb3SSatish Balay 
16762d61bbb3SSatish Balay   PetscFunctionBegin;
16777fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
16789566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1679cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
168020e84f26SHong Zhang     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
16812d61bbb3SSatish Balay 
16829566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
16839566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
16849566063dSJacob Faibussowitsch     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
16859566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
168620e84f26SHong Zhang 
168720e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
168820e84f26SHong Zhang     ati = at->i;
168920e84f26SHong Zhang     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1690fc4dec0aSBarry Smith   } else {
1691fc4dec0aSBarry Smith     C   = *B;
169220e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
169320e84f26SHong Zhang     ati = at->i;
1694fc4dec0aSBarry Smith   }
1695fc4dec0aSBarry Smith 
169620e84f26SHong Zhang   atj = at->j;
169720e84f26SHong Zhang   ata = at->a;
169820e84f26SHong Zhang 
169920e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
17009566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(atfill, ati, nbs));
170120e84f26SHong Zhang 
170220e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
17032d61bbb3SSatish Balay   for (i = 0; i < mbs; i++) {
170420e84f26SHong Zhang     anzj = ai[i + 1] - ai[i];
170520e84f26SHong Zhang     for (j = 0; j < anzj; j++) {
170620e84f26SHong Zhang       atj[atfill[*aj]] = i;
170720e84f26SHong Zhang       for (kr = 0; kr < bs; kr++) {
17089371c9d4SSatish Balay         for (k = 0; k < bs; k++) { ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; }
17092d61bbb3SSatish Balay       }
171020e84f26SHong Zhang       atfill[*aj++] += 1;
171120e84f26SHong Zhang     }
171220e84f26SHong Zhang   }
17139566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
17149566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
17152d61bbb3SSatish Balay 
171620e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
17179566063dSJacob Faibussowitsch   PetscCall(PetscFree(atfill));
171820e84f26SHong Zhang 
1719cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
17209566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
17212d61bbb3SSatish Balay     *B = C;
17222d61bbb3SSatish Balay   } else {
17239566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &C));
17242d61bbb3SSatish Balay   }
17252d61bbb3SSatish Balay   PetscFunctionReturn(0);
17262d61bbb3SSatish Balay }
17272d61bbb3SSatish Balay 
17289371c9d4SSatish Balay PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) {
1729453d3561SHong Zhang   Mat Btrans;
1730453d3561SHong Zhang 
1731453d3561SHong Zhang   PetscFunctionBegin;
1732453d3561SHong Zhang   *f = PETSC_FALSE;
1733acd337a6SBarry Smith   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
17349566063dSJacob Faibussowitsch   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
17359566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Btrans));
1736453d3561SHong Zhang   PetscFunctionReturn(0);
1737453d3561SHong Zhang }
1738453d3561SHong Zhang 
1739618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
17409371c9d4SSatish Balay PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) {
1741b51a4376SLisandro Dalcin   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1742b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1743b51a4376SLisandro Dalcin   PetscInt    *rowlens, *colidxs;
1744b51a4376SLisandro Dalcin   PetscScalar *matvals;
17452593348eSBarry Smith 
17463a40ed3dSBarry Smith   PetscFunctionBegin;
17479566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
17483b2fbd54SBarry Smith 
1749b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1750b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1751b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1752b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1753b51a4376SLisandro Dalcin   nz = bs * bs * A->nz;
17542593348eSBarry Smith 
1755b51a4376SLisandro Dalcin   /* write matrix header */
1756b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
17579371c9d4SSatish Balay   header[1] = M;
17589371c9d4SSatish Balay   header[2] = N;
17599371c9d4SSatish Balay   header[3] = nz;
17609566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
17612593348eSBarry Smith 
1762b51a4376SLisandro Dalcin   /* store row lengths */
17639566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
1764b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
17659371c9d4SSatish Balay     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
17669566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
17679566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
1768b51a4376SLisandro Dalcin 
1769b51a4376SLisandro Dalcin   /* store column indices  */
17709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
1771b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1772b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1773b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
17749371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
17755f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
17769566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
17779566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
17782593348eSBarry Smith 
17792593348eSBarry Smith   /* store nonzero values */
17809566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
1781b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1782b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1783b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
17849371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
17855f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
17869566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
17879566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
1788ce6f0cecSBarry Smith 
1789b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
17909566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
17913a40ed3dSBarry Smith   PetscFunctionReturn(0);
17922593348eSBarry Smith }
17932593348eSBarry Smith 
17949371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) {
17957dc0baabSHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
17967dc0baabSHong Zhang   PetscInt     i, bs = A->rmap->bs, k;
17977dc0baabSHong Zhang 
17987dc0baabSHong Zhang   PetscFunctionBegin;
17999566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
18007dc0baabSHong Zhang   for (i = 0; i < a->mbs; i++) {
18019566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1802*48a46eb9SPierre Jolivet     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
18039566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18047dc0baabSHong Zhang   }
18059566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18067dc0baabSHong Zhang   PetscFunctionReturn(0);
18077dc0baabSHong Zhang }
18087dc0baabSHong Zhang 
18099371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) {
1810b6490206SBarry Smith   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1811d0f46423SBarry Smith   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1812f3ef73ceSBarry Smith   PetscViewerFormat format;
18132593348eSBarry Smith 
18143a40ed3dSBarry Smith   PetscFunctionBegin;
18157dc0baabSHong Zhang   if (A->structure_only) {
18169566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
18177dc0baabSHong Zhang     PetscFunctionReturn(0);
18187dc0baabSHong Zhang   }
18197dc0baabSHong Zhang 
18209566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
1821456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
18229566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1823fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1824ade3a672SBarry Smith     const char *matname;
1825bcd9e38bSBarry Smith     Mat         aij;
18269566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
18279566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
18289566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
18299566063dSJacob Faibussowitsch     PetscCall(MatView(aij, viewer));
18309566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&aij));
183104929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
183204929863SHong Zhang     PetscFunctionReturn(0);
1833fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
18349566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
183544cd7ae7SLois Curfman McInnes     for (i = 0; i < a->mbs; i++) {
183644cd7ae7SLois Curfman McInnes       for (j = 0; j < bs; j++) {
18379566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
183844cd7ae7SLois Curfman McInnes         for (k = a->i[i]; k < a->i[i + 1]; k++) {
183944cd7ae7SLois Curfman McInnes           for (l = 0; l < bs; l++) {
1840aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18410e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18429371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18430e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18449371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18450e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18469566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
18470ef38995SBarry Smith             }
184844cd7ae7SLois Curfman McInnes #else
1849*48a46eb9SPierre Jolivet             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
185044cd7ae7SLois Curfman McInnes #endif
185144cd7ae7SLois Curfman McInnes           }
185244cd7ae7SLois Curfman McInnes         }
18539566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
185444cd7ae7SLois Curfman McInnes       }
185544cd7ae7SLois Curfman McInnes     }
18569566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18570ef38995SBarry Smith   } else {
18589566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1859b6490206SBarry Smith     for (i = 0; i < a->mbs; i++) {
1860b6490206SBarry Smith       for (j = 0; j < bs; j++) {
18619566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1862b6490206SBarry Smith         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1863b6490206SBarry Smith           for (l = 0; l < bs; l++) {
1864aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18650e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
18669371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18670e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
18689371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18690ef38995SBarry Smith             } else {
18709566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
187188685aaeSLois Curfman McInnes             }
187288685aaeSLois Curfman McInnes #else
18739566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
187488685aaeSLois Curfman McInnes #endif
18752593348eSBarry Smith           }
18762593348eSBarry Smith         }
18779566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18782593348eSBarry Smith       }
18792593348eSBarry Smith     }
18809566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1881b6490206SBarry Smith   }
18829566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
18833a40ed3dSBarry Smith   PetscFunctionReturn(0);
18842593348eSBarry Smith }
18852593348eSBarry Smith 
18869804daf3SBarry Smith #include <petscdraw.h>
18879371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) {
188877ed5343SBarry Smith   Mat               A = (Mat)Aa;
18893270192aSSatish Balay   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1890d0f46423SBarry Smith   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
18910e6d2581SBarry Smith   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
18923f1db9ecSBarry Smith   MatScalar        *aa;
1893b0a32e0cSBarry Smith   PetscViewer       viewer;
1894b3e7f47fSJed Brown   PetscViewerFormat format;
18953270192aSSatish Balay 
18963a40ed3dSBarry Smith   PetscFunctionBegin;
18979566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
18989566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
18999566063dSJacob Faibussowitsch   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
190077ed5343SBarry Smith 
19013270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1902b3e7f47fSJed Brown 
1903b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1904d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1905383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1906b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
19073270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19083270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19099371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19109371c9d4SSatish Balay         y_r = y_l + 1.0;
19119371c9d4SSatish Balay         x_l = a->j[j] * bs;
19129371c9d4SSatish Balay         x_r = x_l + 1.0;
19133270192aSSatish Balay         aa  = a->a + j * bs2;
19143270192aSSatish Balay         for (k = 0; k < bs; k++) {
19153270192aSSatish Balay           for (l = 0; l < bs; l++) {
19160e6d2581SBarry Smith             if (PetscRealPart(*aa++) >= 0.) continue;
19179566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19183270192aSSatish Balay           }
19193270192aSSatish Balay         }
19203270192aSSatish Balay       }
19213270192aSSatish Balay     }
1922b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
19233270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19243270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19259371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19269371c9d4SSatish Balay         y_r = y_l + 1.0;
19279371c9d4SSatish Balay         x_l = a->j[j] * bs;
19289371c9d4SSatish Balay         x_r = x_l + 1.0;
19293270192aSSatish Balay         aa  = a->a + j * bs2;
19303270192aSSatish Balay         for (k = 0; k < bs; k++) {
19313270192aSSatish Balay           for (l = 0; l < bs; l++) {
19320e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
19339566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19343270192aSSatish Balay           }
19353270192aSSatish Balay         }
19363270192aSSatish Balay       }
19373270192aSSatish Balay     }
1938b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
19393270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19403270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19419371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19429371c9d4SSatish Balay         y_r = y_l + 1.0;
19439371c9d4SSatish Balay         x_l = a->j[j] * bs;
19449371c9d4SSatish Balay         x_r = x_l + 1.0;
19453270192aSSatish Balay         aa  = a->a + j * bs2;
19463270192aSSatish Balay         for (k = 0; k < bs; k++) {
19473270192aSSatish Balay           for (l = 0; l < bs; l++) {
19480e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
19499566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19503270192aSSatish Balay           }
19513270192aSSatish Balay         }
19523270192aSSatish Balay       }
19533270192aSSatish Balay     }
1954d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
1955b3e7f47fSJed Brown   } else {
1956b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
1957b3e7f47fSJed Brown     /* first determine max of all nonzero values */
1958b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
1959b3e7f47fSJed Brown     PetscDraw popup;
1960b3e7f47fSJed Brown 
1961b3e7f47fSJed Brown     for (i = 0; i < a->nz * a->bs2; i++) {
1962b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1963b3e7f47fSJed Brown     }
1964383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
19659566063dSJacob Faibussowitsch     PetscCall(PetscDrawGetPopup(draw, &popup));
19669566063dSJacob Faibussowitsch     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1967383922c3SLisandro Dalcin 
1968d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1969b3e7f47fSJed Brown     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1970b3e7f47fSJed Brown       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19719371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19729371c9d4SSatish Balay         y_r = y_l + 1.0;
19739371c9d4SSatish Balay         x_l = a->j[j] * bs;
19749371c9d4SSatish Balay         x_r = x_l + 1.0;
1975b3e7f47fSJed Brown         aa  = a->a + j * bs2;
1976b3e7f47fSJed Brown         for (k = 0; k < bs; k++) {
1977b3e7f47fSJed Brown           for (l = 0; l < bs; l++) {
1978383922c3SLisandro Dalcin             MatScalar v = *aa++;
1979383922c3SLisandro Dalcin             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
19809566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1981b3e7f47fSJed Brown           }
1982b3e7f47fSJed Brown         }
1983b3e7f47fSJed Brown       }
1984b3e7f47fSJed Brown     }
1985d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
1986b3e7f47fSJed Brown   }
198777ed5343SBarry Smith   PetscFunctionReturn(0);
198877ed5343SBarry Smith }
19893270192aSSatish Balay 
19909371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) {
19910e6d2581SBarry Smith   PetscReal xl, yl, xr, yr, w, h;
1992b0a32e0cSBarry Smith   PetscDraw draw;
1993ace3abfcSBarry Smith   PetscBool isnull;
19943270192aSSatish Balay 
199577ed5343SBarry Smith   PetscFunctionBegin;
19969566063dSJacob Faibussowitsch   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
19979566063dSJacob Faibussowitsch   PetscCall(PetscDrawIsNull(draw, &isnull));
199845f3bb6eSLisandro Dalcin   if (isnull) PetscFunctionReturn(0);
199977ed5343SBarry Smith 
20009371c9d4SSatish Balay   xr = A->cmap->n;
20019371c9d4SSatish Balay   yr = A->rmap->N;
20029371c9d4SSatish Balay   h  = yr / 10.0;
20039371c9d4SSatish Balay   w  = xr / 10.0;
20049371c9d4SSatish Balay   xr += w;
20059371c9d4SSatish Balay   yr += h;
20069371c9d4SSatish Balay   xl = -w;
20079371c9d4SSatish Balay   yl = -h;
20089566063dSJacob Faibussowitsch   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
20099566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
20109566063dSJacob Faibussowitsch   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
20119566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
20129566063dSJacob Faibussowitsch   PetscCall(PetscDrawSave(draw));
20133a40ed3dSBarry Smith   PetscFunctionReturn(0);
20143270192aSSatish Balay }
20153270192aSSatish Balay 
20169371c9d4SSatish Balay PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) {
2017ace3abfcSBarry Smith   PetscBool iascii, isbinary, isdraw;
20182593348eSBarry Smith 
20193a40ed3dSBarry Smith   PetscFunctionBegin;
20209566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
20219566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
20229566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
202332077d6dSBarry Smith   if (iascii) {
20249566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
20250f5bd95cSBarry Smith   } else if (isbinary) {
20269566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
20270f5bd95cSBarry Smith   } else if (isdraw) {
20289566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
20295cd90555SBarry Smith   } else {
2030a5e6ed63SBarry Smith     Mat B;
20319566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
20329566063dSJacob Faibussowitsch     PetscCall(MatView(B, viewer));
20339566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
20342593348eSBarry Smith   }
20353a40ed3dSBarry Smith   PetscFunctionReturn(0);
20362593348eSBarry Smith }
2037b6490206SBarry Smith 
20389371c9d4SSatish Balay PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) {
2039cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2040c1ac3661SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2041c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
2042d0f46423SBarry Smith   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
204397e567efSBarry Smith   MatScalar   *ap, *aa = a->a;
2044cd0e1443SSatish Balay 
20453a40ed3dSBarry Smith   PetscFunctionBegin;
20462d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over rows */
20479371c9d4SSatish Balay     row  = im[k];
20489371c9d4SSatish Balay     brow = row / bs;
20499371c9d4SSatish Balay     if (row < 0) {
20509371c9d4SSatish Balay       v += n;
20519371c9d4SSatish Balay       continue;
20529371c9d4SSatish Balay     } /* negative row */
205354c59aa7SJacob Faibussowitsch     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2054d29f2997SMatthew Woehlke     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2055d29f2997SMatthew Woehlke     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
20562c3acbe9SBarry Smith     nrow = ailen[brow];
20572d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over columns */
20589371c9d4SSatish Balay       if (in[l] < 0) {
20599371c9d4SSatish Balay         v++;
20609371c9d4SSatish Balay         continue;
20619371c9d4SSatish Balay       } /* negative column */
206254c59aa7SJacob Faibussowitsch       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
20632d61bbb3SSatish Balay       col  = in[l];
20642d61bbb3SSatish Balay       bcol = col / bs;
20652d61bbb3SSatish Balay       cidx = col % bs;
20662d61bbb3SSatish Balay       ridx = row % bs;
20672d61bbb3SSatish Balay       high = nrow;
20682d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
20692d61bbb3SSatish Balay       while (high - low > 5) {
2070cd0e1443SSatish Balay         t = (low + high) / 2;
2071cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
2072cd0e1443SSatish Balay         else low = t;
2073cd0e1443SSatish Balay       }
2074cd0e1443SSatish Balay       for (i = low; i < high; i++) {
2075cd0e1443SSatish Balay         if (rp[i] > bcol) break;
2076cd0e1443SSatish Balay         if (rp[i] == bcol) {
20772d61bbb3SSatish Balay           *v++ = ap[bs2 * i + bs * cidx + ridx];
20782d61bbb3SSatish Balay           goto finished;
2079cd0e1443SSatish Balay         }
2080cd0e1443SSatish Balay       }
208197e567efSBarry Smith       *v++ = 0.0;
20822d61bbb3SSatish Balay     finished:;
2083cd0e1443SSatish Balay     }
2084cd0e1443SSatish Balay   }
20853a40ed3dSBarry Smith   PetscFunctionReturn(0);
2086cd0e1443SSatish Balay }
2087cd0e1443SSatish Balay 
20889371c9d4SSatish Balay PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) {
208992c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2090e2ee6c50SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2091c1ac3661SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2092d0f46423SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2093ace3abfcSBarry Smith   PetscBool          roworiented = a->roworiented;
2094dd6ea824SBarry Smith   const PetscScalar *value       = v;
20959d243f67SHong Zhang   MatScalar         *ap = NULL, *aa = a->a, *bap;
209692c4ed94SBarry Smith 
20973a40ed3dSBarry Smith   PetscFunctionBegin;
20980e324ae4SSatish Balay   if (roworiented) {
20990e324ae4SSatish Balay     stepval = (n - 1) * bs;
21000e324ae4SSatish Balay   } else {
21010e324ae4SSatish Balay     stepval = (m - 1) * bs;
21020e324ae4SSatish Balay   }
210392c4ed94SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
210492c4ed94SBarry Smith     row = im[k];
21055ef9f2a5SBarry Smith     if (row < 0) continue;
21066bdcaf15SBarry Smith     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
210792c4ed94SBarry Smith     rp = aj + ai[row];
21087dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[row];
210992c4ed94SBarry Smith     rmax = imax[row];
211092c4ed94SBarry Smith     nrow = ailen[row];
211192c4ed94SBarry Smith     low  = 0;
2112c71e6ed7SBarry Smith     high = nrow;
211392c4ed94SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
21145ef9f2a5SBarry Smith       if (in[l] < 0) continue;
21156bdcaf15SBarry Smith       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
211692c4ed94SBarry Smith       col = in[l];
21177dc0baabSHong Zhang       if (!A->structure_only) {
211892c4ed94SBarry Smith         if (roworiented) {
211953ef36baSBarry Smith           value = v + (k * (stepval + bs) + l) * bs;
21200e324ae4SSatish Balay         } else {
212153ef36baSBarry Smith           value = v + (l * (stepval + bs) + k) * bs;
212292c4ed94SBarry Smith         }
21237dc0baabSHong Zhang       }
212426fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
212526fbe8dcSKarl Rupp       else high = nrow;
2126e2ee6c50SBarry Smith       lastcol = col;
212792c4ed94SBarry Smith       while (high - low > 7) {
212892c4ed94SBarry Smith         t = (low + high) / 2;
212992c4ed94SBarry Smith         if (rp[t] > col) high = t;
213092c4ed94SBarry Smith         else low = t;
213192c4ed94SBarry Smith       }
213292c4ed94SBarry Smith       for (i = low; i < high; i++) {
213392c4ed94SBarry Smith         if (rp[i] > col) break;
213492c4ed94SBarry Smith         if (rp[i] == col) {
21357dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
21368a84c255SSatish Balay           bap = ap + bs2 * i;
21370e324ae4SSatish Balay           if (roworiented) {
21388a84c255SSatish Balay             if (is == ADD_VALUES) {
2139dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
21409371c9d4SSatish Balay                 for (jj = ii; jj < bs2; jj += bs) { bap[jj] += *value++; }
2141dd9472c6SBarry Smith               }
21420e324ae4SSatish Balay             } else {
2143dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
21449371c9d4SSatish Balay                 for (jj = ii; jj < bs2; jj += bs) { bap[jj] = *value++; }
2145dd9472c6SBarry Smith               }
2146dd9472c6SBarry Smith             }
21470e324ae4SSatish Balay           } else {
21480e324ae4SSatish Balay             if (is == ADD_VALUES) {
214953ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
21509371c9d4SSatish Balay                 for (jj = 0; jj < bs; jj++) { bap[jj] += value[jj]; }
215153ef36baSBarry Smith                 bap += bs;
2152dd9472c6SBarry Smith               }
21530e324ae4SSatish Balay             } else {
215453ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
21559371c9d4SSatish Balay                 for (jj = 0; jj < bs; jj++) { bap[jj] = value[jj]; }
215653ef36baSBarry Smith                 bap += bs;
21578a84c255SSatish Balay               }
2158dd9472c6SBarry Smith             }
2159dd9472c6SBarry Smith           }
2160f1241b54SBarry Smith           goto noinsert2;
216192c4ed94SBarry Smith         }
216292c4ed94SBarry Smith       }
216389280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
21645f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
21657dc0baabSHong Zhang       if (A->structure_only) {
21667dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
21677dc0baabSHong Zhang       } else {
2168fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
21697dc0baabSHong Zhang       }
21709371c9d4SSatish Balay       N = nrow++ - 1;
21719371c9d4SSatish Balay       high++;
217292c4ed94SBarry Smith       /* shift up all the later entries in this row */
21739566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
217492c4ed94SBarry Smith       rp[i] = col;
21757dc0baabSHong Zhang       if (!A->structure_only) {
21769566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
21778a84c255SSatish Balay         bap = ap + bs2 * i;
21780e324ae4SSatish Balay         if (roworiented) {
2179dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
21809371c9d4SSatish Balay             for (jj = ii; jj < bs2; jj += bs) { bap[jj] = *value++; }
2181dd9472c6SBarry Smith           }
21820e324ae4SSatish Balay         } else {
2183dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
21849371c9d4SSatish Balay             for (jj = 0; jj < bs; jj++) { *bap++ = *value++; }
2185dd9472c6SBarry Smith           }
2186dd9472c6SBarry Smith         }
21877dc0baabSHong Zhang       }
2188f1241b54SBarry Smith     noinsert2:;
218992c4ed94SBarry Smith       low = i;
219092c4ed94SBarry Smith     }
219192c4ed94SBarry Smith     ailen[row] = nrow;
219292c4ed94SBarry Smith   }
21933a40ed3dSBarry Smith   PetscFunctionReturn(0);
219492c4ed94SBarry Smith }
219526e093fcSHong Zhang 
21969371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) {
2197584200bdSSatish Balay   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2198580bdb30SBarry Smith   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2199d0f46423SBarry Smith   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2200c1ac3661SBarry Smith   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
22013f1db9ecSBarry Smith   MatScalar   *aa    = a->a, *ap;
22023447b6efSHong Zhang   PetscReal    ratio = 0.6;
2203584200bdSSatish Balay 
22043a40ed3dSBarry Smith   PetscFunctionBegin;
22053a40ed3dSBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2206584200bdSSatish Balay 
220743ee02c3SBarry Smith   if (m) rmax = ailen[0];
2208584200bdSSatish Balay   for (i = 1; i < mbs; i++) {
2209584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
2210584200bdSSatish Balay     fshift += imax[i - 1] - ailen[i - 1];
2211d402145bSBarry Smith     rmax = PetscMax(rmax, ailen[i]);
2212584200bdSSatish Balay     if (fshift) {
2213580bdb30SBarry Smith       ip = aj + ai[i];
2214580bdb30SBarry Smith       ap = aa + bs2 * ai[i];
2215584200bdSSatish Balay       N  = ailen[i];
22169566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(ip - fshift, ip, N));
2217*48a46eb9SPierre Jolivet       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2218672ba085SHong Zhang     }
2219584200bdSSatish Balay     ai[i] = ai[i - 1] + ailen[i - 1];
2220584200bdSSatish Balay   }
2221584200bdSSatish Balay   if (mbs) {
2222584200bdSSatish Balay     fshift += imax[mbs - 1] - ailen[mbs - 1];
2223584200bdSSatish Balay     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2224584200bdSSatish Balay   }
22257c565772SBarry Smith 
2226584200bdSSatish Balay   /* reset ilen and imax for each row */
22277c565772SBarry Smith   a->nonzerorowcnt = 0;
2228672ba085SHong Zhang   if (A->structure_only) {
22299566063dSJacob Faibussowitsch     PetscCall(PetscFree2(a->imax, a->ilen));
2230672ba085SHong Zhang   } else { /* !A->structure_only */
2231584200bdSSatish Balay     for (i = 0; i < mbs; i++) {
2232584200bdSSatish Balay       ailen[i] = imax[i] = ai[i + 1] - ai[i];
22337c565772SBarry Smith       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2234584200bdSSatish Balay     }
2235672ba085SHong Zhang   }
2236a7c10996SSatish Balay   a->nz = ai[mbs];
2237584200bdSSatish Balay 
2238584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
2239b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
2240584200bdSSatish Balay   if (fshift && a->diag) {
22419566063dSJacob Faibussowitsch     PetscCall(PetscFree(a->diag));
22429566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)A, -(mbs + 1) * sizeof(PetscInt)));
2243f4259b30SLisandro Dalcin     a->diag = NULL;
2244584200bdSSatish Balay   }
22455f80ce2aSJacob Faibussowitsch   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
22469566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
22479566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
22489566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
224926fbe8dcSKarl Rupp 
22508e58a170SBarry Smith   A->info.mallocs += a->reallocs;
2251e2f3b5e9SSatish Balay   a->reallocs         = 0;
22520e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2253647a6520SHong Zhang   a->rmax             = rmax;
2254cf4441caSHong Zhang 
2255*48a46eb9SPierre Jolivet   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
22563a40ed3dSBarry Smith   PetscFunctionReturn(0);
2257584200bdSSatish Balay }
2258584200bdSSatish Balay 
2259bea157c4SSatish Balay /*
2260bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2261bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2262a5b23f4aSJose E. Roman    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2263bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2264bea157c4SSatish Balay */
22659371c9d4SSatish Balay static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) {
2266c1ac3661SBarry Smith   PetscInt  i, j, k, row;
2267ace3abfcSBarry Smith   PetscBool flg;
22683a40ed3dSBarry Smith 
2269433994e6SBarry Smith   PetscFunctionBegin;
2270bea157c4SSatish Balay   for (i = 0, j = 0; i < n; j++) {
2271bea157c4SSatish Balay     row = idx[i];
2272a5b23f4aSJose E. Roman     if (row % bs != 0) { /* Not the beginning of a block */
2273bea157c4SSatish Balay       sizes[j] = 1;
2274bea157c4SSatish Balay       i++;
2275e4fda26cSSatish Balay     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2276bea157c4SSatish Balay       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2277bea157c4SSatish Balay       i++;
22786aad120cSJose E. Roman     } else { /* Beginning of the block, so check if the complete block exists */
2279bea157c4SSatish Balay       flg = PETSC_TRUE;
2280bea157c4SSatish Balay       for (k = 1; k < bs; k++) {
2281bea157c4SSatish Balay         if (row + k != idx[i + k]) { /* break in the block */
2282bea157c4SSatish Balay           flg = PETSC_FALSE;
2283bea157c4SSatish Balay           break;
2284d9b7c43dSSatish Balay         }
2285bea157c4SSatish Balay       }
2286abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2287bea157c4SSatish Balay         sizes[j] = bs;
2288bea157c4SSatish Balay         i += bs;
2289bea157c4SSatish Balay       } else {
2290bea157c4SSatish Balay         sizes[j] = 1;
2291bea157c4SSatish Balay         i++;
2292bea157c4SSatish Balay       }
2293bea157c4SSatish Balay     }
2294bea157c4SSatish Balay   }
2295bea157c4SSatish Balay   *bs_max = j;
22963a40ed3dSBarry Smith   PetscFunctionReturn(0);
2297d9b7c43dSSatish Balay }
2298d9b7c43dSSatish Balay 
22999371c9d4SSatish Balay PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) {
2300d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2301f4df32b1SMatthew Knepley   PetscInt           i, j, k, count, *rows;
2302d0f46423SBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
230387828ca2SBarry Smith   PetscScalar        zero = 0.0;
23043f1db9ecSBarry Smith   MatScalar         *aa;
230597b48c8fSBarry Smith   const PetscScalar *xx;
230697b48c8fSBarry Smith   PetscScalar       *bb;
2307d9b7c43dSSatish Balay 
23083a40ed3dSBarry Smith   PetscFunctionBegin;
230997b48c8fSBarry Smith   /* fix right hand side if needed */
231097b48c8fSBarry Smith   if (x && b) {
23119566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23129566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
23139371c9d4SSatish Balay     for (i = 0; i < is_n; i++) { bb[is_idx[i]] = diag * xx[is_idx[i]]; }
23149566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
23159566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
231697b48c8fSBarry Smith   }
231797b48c8fSBarry Smith 
2318d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2319bea157c4SSatish Balay   /* allocate memory for rows,sizes */
23209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2321bea157c4SSatish Balay 
2322563b5814SBarry Smith   /* copy IS values to rows, and sort them */
232326fbe8dcSKarl Rupp   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
23249566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(is_n, rows));
232597b48c8fSBarry Smith 
2326a9817697SBarry Smith   if (baij->keepnonzeropattern) {
232726fbe8dcSKarl Rupp     for (i = 0; i < is_n; i++) sizes[i] = 1;
2328dffd3267SBarry Smith     bs_max = is_n;
2329dffd3267SBarry Smith   } else {
23309566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2331e56f5c9eSBarry Smith     A->nonzerostate++;
2332dffd3267SBarry Smith   }
2333bea157c4SSatish Balay 
2334bea157c4SSatish Balay   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2335bea157c4SSatish Balay     row = rows[j];
23365f80ce2aSJacob Faibussowitsch     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2337bea157c4SSatish Balay     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2338b31fbe3bSSatish Balay     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2339a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2340d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2341bea157c4SSatish Balay         if (baij->ilen[row / bs] > 0) {
2342bea157c4SSatish Balay           baij->ilen[row / bs]       = 1;
2343bea157c4SSatish Balay           baij->j[baij->i[row / bs]] = row / bs;
234426fbe8dcSKarl Rupp 
23459566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(aa, count * bs));
2346a07cd24cSSatish Balay         }
2347563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
2348*48a46eb9SPierre Jolivet         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2349f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2350bea157c4SSatish Balay         baij->ilen[row / bs] = 0;
2351f4df32b1SMatthew Knepley       }      /* end (diag == 0.0) */
2352bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
23536bdcaf15SBarry Smith       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2354bea157c4SSatish Balay       for (k = 0; k < count; k++) {
2355d9b7c43dSSatish Balay         aa[0] = zero;
2356d9b7c43dSSatish Balay         aa += bs;
2357d9b7c43dSSatish Balay       }
2358*48a46eb9SPierre Jolivet       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2359d9b7c43dSSatish Balay     }
2360bea157c4SSatish Balay   }
2361bea157c4SSatish Balay 
23629566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, sizes));
23639566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
23643a40ed3dSBarry Smith   PetscFunctionReturn(0);
2365d9b7c43dSSatish Balay }
23661c351548SSatish Balay 
23679371c9d4SSatish Balay PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) {
236897b48c8fSBarry Smith   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
236997b48c8fSBarry Smith   PetscInt           i, j, k, count;
237097b48c8fSBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
237197b48c8fSBarry Smith   PetscScalar        zero = 0.0;
237297b48c8fSBarry Smith   MatScalar         *aa;
237397b48c8fSBarry Smith   const PetscScalar *xx;
237497b48c8fSBarry Smith   PetscScalar       *bb;
237556777dd2SBarry Smith   PetscBool         *zeroed, vecs = PETSC_FALSE;
237697b48c8fSBarry Smith 
237797b48c8fSBarry Smith   PetscFunctionBegin;
237897b48c8fSBarry Smith   /* fix right hand side if needed */
237997b48c8fSBarry Smith   if (x && b) {
23809566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23819566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
238256777dd2SBarry Smith     vecs = PETSC_TRUE;
238397b48c8fSBarry Smith   }
238497b48c8fSBarry Smith 
238597b48c8fSBarry Smith   /* zero the columns */
23869566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
238797b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
23885f80ce2aSJacob Faibussowitsch     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
238997b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
239097b48c8fSBarry Smith   }
239197b48c8fSBarry Smith   for (i = 0; i < A->rmap->N; i++) {
239297b48c8fSBarry Smith     if (!zeroed[i]) {
239397b48c8fSBarry Smith       row = i / bs;
239497b48c8fSBarry Smith       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
239597b48c8fSBarry Smith         for (k = 0; k < bs; k++) {
239697b48c8fSBarry Smith           col = bs * baij->j[j] + k;
239797b48c8fSBarry Smith           if (zeroed[col]) {
239897b48c8fSBarry Smith             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
239956777dd2SBarry Smith             if (vecs) bb[i] -= aa[0] * xx[col];
240097b48c8fSBarry Smith             aa[0] = 0.0;
240197b48c8fSBarry Smith           }
240297b48c8fSBarry Smith         }
240397b48c8fSBarry Smith       }
240456777dd2SBarry Smith     } else if (vecs) bb[i] = diag * xx[i];
240597b48c8fSBarry Smith   }
24069566063dSJacob Faibussowitsch   PetscCall(PetscFree(zeroed));
240756777dd2SBarry Smith   if (vecs) {
24089566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
24099566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
241056777dd2SBarry Smith   }
241197b48c8fSBarry Smith 
241297b48c8fSBarry Smith   /* zero the rows */
241397b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
241497b48c8fSBarry Smith     row   = is_idx[i];
241597b48c8fSBarry Smith     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
241697b48c8fSBarry Smith     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
241797b48c8fSBarry Smith     for (k = 0; k < count; k++) {
241897b48c8fSBarry Smith       aa[0] = zero;
241997b48c8fSBarry Smith       aa += bs;
242097b48c8fSBarry Smith     }
2421dbbe0bcdSBarry Smith     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
242297b48c8fSBarry Smith   }
24239566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
242497b48c8fSBarry Smith   PetscFunctionReturn(0);
242597b48c8fSBarry Smith }
242697b48c8fSBarry Smith 
24279371c9d4SSatish Balay PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) {
24282d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2429e2ee6c50SBarry Smith   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2430c1ac3661SBarry Smith   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2431d0f46423SBarry Smith   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2432c1ac3661SBarry Smith   PetscInt     ridx, cidx, bs2                 = a->bs2;
2433ace3abfcSBarry Smith   PetscBool    roworiented = a->roworiented;
2434d8cdefa3SHong Zhang   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
24352d61bbb3SSatish Balay 
24362d61bbb3SSatish Balay   PetscFunctionBegin;
24372d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over added rows */
2438085a36d4SBarry Smith     row  = im[k];
2439085a36d4SBarry Smith     brow = row / bs;
24405ef9f2a5SBarry Smith     if (row < 0) continue;
24416bdcaf15SBarry Smith     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
24422d61bbb3SSatish Balay     rp = aj + ai[brow];
2443672ba085SHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[brow];
24442d61bbb3SSatish Balay     rmax = imax[brow];
24452d61bbb3SSatish Balay     nrow = ailen[brow];
24462d61bbb3SSatish Balay     low  = 0;
2447c71e6ed7SBarry Smith     high = nrow;
24482d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over added columns */
24495ef9f2a5SBarry Smith       if (in[l] < 0) continue;
24506bdcaf15SBarry Smith       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
24519371c9d4SSatish Balay       col  = in[l];
24529371c9d4SSatish Balay       bcol = col / bs;
24539371c9d4SSatish Balay       ridx = row % bs;
24549371c9d4SSatish Balay       cidx = col % bs;
2455672ba085SHong Zhang       if (!A->structure_only) {
24562d61bbb3SSatish Balay         if (roworiented) {
24575ef9f2a5SBarry Smith           value = v[l + k * n];
24582d61bbb3SSatish Balay         } else {
24592d61bbb3SSatish Balay           value = v[k + l * m];
24602d61bbb3SSatish Balay         }
2461672ba085SHong Zhang       }
24629371c9d4SSatish Balay       if (col <= lastcol) low = 0;
24639371c9d4SSatish Balay       else high = nrow;
2464e2ee6c50SBarry Smith       lastcol = col;
24652d61bbb3SSatish Balay       while (high - low > 7) {
24662d61bbb3SSatish Balay         t = (low + high) / 2;
24672d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
24682d61bbb3SSatish Balay         else low = t;
24692d61bbb3SSatish Balay       }
24702d61bbb3SSatish Balay       for (i = low; i < high; i++) {
24712d61bbb3SSatish Balay         if (rp[i] > bcol) break;
24722d61bbb3SSatish Balay         if (rp[i] == bcol) {
24732d61bbb3SSatish Balay           bap = ap + bs2 * i + bs * cidx + ridx;
2474672ba085SHong Zhang           if (!A->structure_only) {
24752d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
24762d61bbb3SSatish Balay             else *bap = value;
2477672ba085SHong Zhang           }
24782d61bbb3SSatish Balay           goto noinsert1;
24792d61bbb3SSatish Balay         }
24802d61bbb3SSatish Balay       }
24812d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
24825f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2483672ba085SHong Zhang       if (A->structure_only) {
2484672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2485672ba085SHong Zhang       } else {
2486fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2487672ba085SHong Zhang       }
24889371c9d4SSatish Balay       N = nrow++ - 1;
24899371c9d4SSatish Balay       high++;
24902d61bbb3SSatish Balay       /* shift up all the later entries in this row */
24919566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
24922d61bbb3SSatish Balay       rp[i] = bcol;
2493580bdb30SBarry Smith       if (!A->structure_only) {
24949566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
24959566063dSJacob Faibussowitsch         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2496580bdb30SBarry Smith         ap[bs2 * i + bs * cidx + ridx] = value;
2497580bdb30SBarry Smith       }
2498085a36d4SBarry Smith       a->nz++;
2499e56f5c9eSBarry Smith       A->nonzerostate++;
25002d61bbb3SSatish Balay     noinsert1:;
25012d61bbb3SSatish Balay       low = i;
25022d61bbb3SSatish Balay     }
25032d61bbb3SSatish Balay     ailen[brow] = nrow;
25042d61bbb3SSatish Balay   }
25052d61bbb3SSatish Balay   PetscFunctionReturn(0);
25062d61bbb3SSatish Balay }
25072d61bbb3SSatish Balay 
25089371c9d4SSatish Balay PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) {
25092d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
25102d61bbb3SSatish Balay   Mat          outA;
2511ace3abfcSBarry Smith   PetscBool    row_identity, col_identity;
25122d61bbb3SSatish Balay 
25132d61bbb3SSatish Balay   PetscFunctionBegin;
25145f80ce2aSJacob Faibussowitsch   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
25159566063dSJacob Faibussowitsch   PetscCall(ISIdentity(row, &row_identity));
25169566063dSJacob Faibussowitsch   PetscCall(ISIdentity(col, &col_identity));
25175f80ce2aSJacob Faibussowitsch   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
25182d61bbb3SSatish Balay 
25192d61bbb3SSatish Balay   outA            = inA;
2520d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
25219566063dSJacob Faibussowitsch   PetscCall(PetscFree(inA->solvertype));
25229566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
25232d61bbb3SSatish Balay 
25249566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2525cf242676SKris Buschelman 
25269566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)row));
25279566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
2528c3122656SLisandro Dalcin   a->row = row;
25299566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)col));
25309566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
2531c3122656SLisandro Dalcin   a->col = col;
2532c38d4ed2SBarry Smith 
2533c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
25349566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
25359566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
25369566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)inA, (PetscObject)a->icol));
2537c38d4ed2SBarry Smith 
25389566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2539c38d4ed2SBarry Smith   if (!a->solve_work) {
25409566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
25419566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)inA, (inA->rmap->N + inA->rmap->bs) * sizeof(PetscScalar)));
2542c38d4ed2SBarry Smith   }
25439566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(outA, inA, info));
25442d61bbb3SSatish Balay   PetscFunctionReturn(0);
25452d61bbb3SSatish Balay }
2546d9b7c43dSSatish Balay 
25479371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, PetscInt *indices) {
254827a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2549bdb1c0e1SJed Brown   PetscInt     i, nz, mbs;
255027a8da17SBarry Smith 
255127a8da17SBarry Smith   PetscFunctionBegin;
2552b32cb4a7SJed Brown   nz  = baij->maxnz;
2553bdb1c0e1SJed Brown   mbs = baij->mbs;
25549371c9d4SSatish Balay   for (i = 0; i < nz; i++) { baij->j[i] = indices[i]; }
255527a8da17SBarry Smith   baij->nz = nz;
25569371c9d4SSatish Balay   for (i = 0; i < mbs; i++) { baij->ilen[i] = baij->imax[i]; }
255727a8da17SBarry Smith   PetscFunctionReturn(0);
255827a8da17SBarry Smith }
255927a8da17SBarry Smith 
256027a8da17SBarry Smith /*@
256127a8da17SBarry Smith     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
256227a8da17SBarry Smith        in the matrix.
256327a8da17SBarry Smith 
256427a8da17SBarry Smith   Input Parameters:
256527a8da17SBarry Smith +  mat - the SeqBAIJ matrix
256627a8da17SBarry Smith -  indices - the column indices
256727a8da17SBarry Smith 
256815091d37SBarry Smith   Level: advanced
256915091d37SBarry Smith 
257027a8da17SBarry Smith   Notes:
257127a8da17SBarry Smith     This can be called if you have precomputed the nonzero structure of the
257227a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
257327a8da17SBarry Smith   of the MatSetValues() operation.
257427a8da17SBarry Smith 
257527a8da17SBarry Smith     You MUST have set the correct numbers of nonzeros per row in the call to
2576d1be2dadSMatthew Knepley   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
257727a8da17SBarry Smith 
257827a8da17SBarry Smith     MUST be called before any calls to MatSetValues();
257927a8da17SBarry Smith 
258027a8da17SBarry Smith @*/
25819371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) {
258227a8da17SBarry Smith   PetscFunctionBegin;
25830700a824SBarry Smith   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2584dadcf809SJacob Faibussowitsch   PetscValidIntPointer(indices, 2);
2585cac4c232SBarry Smith   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
258627a8da17SBarry Smith   PetscFunctionReturn(0);
258727a8da17SBarry Smith }
258827a8da17SBarry Smith 
25899371c9d4SSatish Balay PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) {
2590273d9f13SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2591c1ac3661SBarry Smith   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2592273d9f13SBarry Smith   PetscReal    atmp;
259387828ca2SBarry Smith   PetscScalar *x, zero = 0.0;
2594273d9f13SBarry Smith   MatScalar   *aa;
2595c1ac3661SBarry Smith   PetscInt     ncols, brow, krow, kcol;
2596273d9f13SBarry Smith 
2597273d9f13SBarry Smith   PetscFunctionBegin;
25985f80ce2aSJacob Faibussowitsch   /* why is this not a macro???????????????????????????????????????????????????????????????? */
25995f80ce2aSJacob Faibussowitsch   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2600d0f46423SBarry Smith   bs  = A->rmap->bs;
2601273d9f13SBarry Smith   aa  = a->a;
2602273d9f13SBarry Smith   ai  = a->i;
2603273d9f13SBarry Smith   aj  = a->j;
2604273d9f13SBarry Smith   mbs = a->mbs;
2605273d9f13SBarry Smith 
26069566063dSJacob Faibussowitsch   PetscCall(VecSet(v, zero));
26079566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
26089566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(v, &n));
26095f80ce2aSJacob Faibussowitsch   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2610273d9f13SBarry Smith   for (i = 0; i < mbs; i++) {
26119371c9d4SSatish Balay     ncols = ai[1] - ai[0];
26129371c9d4SSatish Balay     ai++;
2613273d9f13SBarry Smith     brow = bs * i;
2614273d9f13SBarry Smith     for (j = 0; j < ncols; j++) {
2615273d9f13SBarry Smith       for (kcol = 0; kcol < bs; kcol++) {
2616273d9f13SBarry Smith         for (krow = 0; krow < bs; krow++) {
26179371c9d4SSatish Balay           atmp = PetscAbsScalar(*aa);
26189371c9d4SSatish Balay           aa++;
2619273d9f13SBarry Smith           row = brow + krow; /* row index */
26209371c9d4SSatish Balay           if (PetscAbsScalar(x[row]) < atmp) {
26219371c9d4SSatish Balay             x[row] = atmp;
26229371c9d4SSatish Balay             if (idx) idx[row] = bs * (*aj) + kcol;
26239371c9d4SSatish Balay           }
2624273d9f13SBarry Smith         }
2625273d9f13SBarry Smith       }
2626273d9f13SBarry Smith       aj++;
2627273d9f13SBarry Smith     }
2628273d9f13SBarry Smith   }
26299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
2630273d9f13SBarry Smith   PetscFunctionReturn(0);
2631273d9f13SBarry Smith }
2632273d9f13SBarry Smith 
26339371c9d4SSatish Balay PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) {
26343c896bc6SHong Zhang   PetscFunctionBegin;
26353c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
26363c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
26373c896bc6SHong Zhang     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
26383c896bc6SHong Zhang     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2639d88c0aacSHong Zhang     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
26403c896bc6SHong Zhang 
26415f80ce2aSJacob Faibussowitsch     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
26425f80ce2aSJacob Faibussowitsch     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
26439566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
26449566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)B));
26453c896bc6SHong Zhang   } else {
26469566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
26473c896bc6SHong Zhang   }
26483c896bc6SHong Zhang   PetscFunctionReturn(0);
26493c896bc6SHong Zhang }
26503c896bc6SHong Zhang 
26519371c9d4SSatish Balay PetscErrorCode MatSetUp_SeqBAIJ(Mat A) {
2652273d9f13SBarry Smith   PetscFunctionBegin;
26539566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL));
2654273d9f13SBarry Smith   PetscFunctionReturn(0);
2655273d9f13SBarry Smith }
2656273d9f13SBarry Smith 
26579371c9d4SSatish Balay static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) {
2658f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
26596e111a19SKarl Rupp 
2660f2a5309cSSatish Balay   PetscFunctionBegin;
2661f2a5309cSSatish Balay   *array = a->a;
2662f2a5309cSSatish Balay   PetscFunctionReturn(0);
2663f2a5309cSSatish Balay }
2664f2a5309cSSatish Balay 
26659371c9d4SSatish Balay static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) {
2666f2a5309cSSatish Balay   PetscFunctionBegin;
2667cda14afcSprj-   *array = NULL;
2668f2a5309cSSatish Balay   PetscFunctionReturn(0);
2669f2a5309cSSatish Balay }
2670f2a5309cSSatish Balay 
26719371c9d4SSatish Balay PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) {
2672b264fe52SHong Zhang   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
267352768537SHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
267452768537SHong Zhang   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
267552768537SHong Zhang 
267652768537SHong Zhang   PetscFunctionBegin;
267752768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
26789566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
267952768537SHong Zhang   PetscFunctionReturn(0);
268052768537SHong Zhang }
268152768537SHong Zhang 
26829371c9d4SSatish Balay PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
268342ee4b1aSHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
268431ce2d13SHong Zhang   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2685e838b9e7SJed Brown   PetscBLASInt one = 1;
268642ee4b1aSHong Zhang 
268742ee4b1aSHong Zhang   PetscFunctionBegin;
2688134adf20SPierre Jolivet   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2689134adf20SPierre Jolivet     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2690134adf20SPierre Jolivet     if (e) {
26919566063dSJacob Faibussowitsch       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2692134adf20SPierre Jolivet       if (e) {
26939566063dSJacob Faibussowitsch         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2694134adf20SPierre Jolivet         if (e) str = SAME_NONZERO_PATTERN;
2695134adf20SPierre Jolivet       }
2696134adf20SPierre Jolivet     }
269754c59aa7SJacob Faibussowitsch     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2698134adf20SPierre Jolivet   }
269942ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2700f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2701c5df96a5SBarry Smith     PetscBLASInt bnz;
27029566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2703792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
27049566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2705ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
27069566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
270742ee4b1aSHong Zhang   } else {
270852768537SHong Zhang     Mat       B;
270952768537SHong Zhang     PetscInt *nnz;
271054c59aa7SJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
27119566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
27129566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
27139566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
27149566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
27159566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
27169566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
27179566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
27189566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
27199566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
27209566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
27219566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
272242ee4b1aSHong Zhang   }
272342ee4b1aSHong Zhang   PetscFunctionReturn(0);
272442ee4b1aSHong Zhang }
272542ee4b1aSHong Zhang 
27269371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) {
27272726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX)
27282726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27292726fb6dSPierre Jolivet   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
27302726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
27312726fb6dSPierre Jolivet 
27322726fb6dSPierre Jolivet   PetscFunctionBegin;
27332726fb6dSPierre Jolivet   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
27342726fb6dSPierre Jolivet #else
27352726fb6dSPierre Jolivet   PetscFunctionBegin;
27362726fb6dSPierre Jolivet #endif
27372726fb6dSPierre Jolivet   PetscFunctionReturn(0);
27382726fb6dSPierre Jolivet }
27392726fb6dSPierre Jolivet 
27409371c9d4SSatish Balay PetscErrorCode MatRealPart_SeqBAIJ(Mat A) {
274199cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
274299cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2743dd6ea824SBarry Smith   MatScalar   *aa = a->a;
274499cafbc1SBarry Smith 
274599cafbc1SBarry Smith   PetscFunctionBegin;
274699cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
274799cafbc1SBarry Smith   PetscFunctionReturn(0);
274899cafbc1SBarry Smith }
274999cafbc1SBarry Smith 
27509371c9d4SSatish Balay PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) {
275199cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
275299cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2753dd6ea824SBarry Smith   MatScalar   *aa = a->a;
275499cafbc1SBarry Smith 
275599cafbc1SBarry Smith   PetscFunctionBegin;
275699cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
275799cafbc1SBarry Smith   PetscFunctionReturn(0);
275899cafbc1SBarry Smith }
275999cafbc1SBarry Smith 
27603acb8795SBarry Smith /*
27612479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
27623acb8795SBarry Smith */
27639371c9d4SSatish Balay PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
27643acb8795SBarry Smith   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
27653acb8795SBarry Smith   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
27663acb8795SBarry Smith   PetscInt     nz = a->i[m], row, *jj, mr, col;
27673acb8795SBarry Smith 
27683acb8795SBarry Smith   PetscFunctionBegin;
27693acb8795SBarry Smith   *nn = n;
27703acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
27715f80ce2aSJacob Faibussowitsch   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
27729566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
27739566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
27749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
27753acb8795SBarry Smith   jj = a->j;
27769371c9d4SSatish Balay   for (i = 0; i < nz; i++) { collengths[jj[i]]++; }
27773acb8795SBarry Smith   cia[0] = oshift;
27789371c9d4SSatish Balay   for (i = 0; i < n; i++) { cia[i + 1] = cia[i] + collengths[i]; }
27799566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
27803acb8795SBarry Smith   jj = a->j;
27813acb8795SBarry Smith   for (row = 0; row < m; row++) {
27823acb8795SBarry Smith     mr = a->i[row + 1] - a->i[row];
27833acb8795SBarry Smith     for (i = 0; i < mr; i++) {
27843acb8795SBarry Smith       col = *jj++;
278526fbe8dcSKarl Rupp 
27863acb8795SBarry Smith       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
27873acb8795SBarry Smith     }
27883acb8795SBarry Smith   }
27899566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
27909371c9d4SSatish Balay   *ia = cia;
27919371c9d4SSatish Balay   *ja = cja;
27923acb8795SBarry Smith   PetscFunctionReturn(0);
27933acb8795SBarry Smith }
27943acb8795SBarry Smith 
27959371c9d4SSatish Balay PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
27963acb8795SBarry Smith   PetscFunctionBegin;
27973acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
27989566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ia));
27999566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ja));
28003acb8795SBarry Smith   PetscFunctionReturn(0);
28013acb8795SBarry Smith }
28023acb8795SBarry Smith 
2803525d23c0SHong Zhang /*
2804525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2805525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2806040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2807525d23c0SHong Zhang  */
28089371c9d4SSatish Balay PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) {
2809525d23c0SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2810c0349474SHong Zhang   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2811525d23c0SHong Zhang   PetscInt     nz = a->i[m], row, *jj, mr, col;
2812525d23c0SHong Zhang   PetscInt    *cspidx;
2813f6d58c54SBarry Smith 
2814f6d58c54SBarry Smith   PetscFunctionBegin;
2815525d23c0SHong Zhang   *nn = n;
2816525d23c0SHong Zhang   if (!ia) PetscFunctionReturn(0);
2817f6d58c54SBarry Smith 
28189566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28199566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28219566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cspidx));
2822525d23c0SHong Zhang   jj = a->j;
28239371c9d4SSatish Balay   for (i = 0; i < nz; i++) { collengths[jj[i]]++; }
2824525d23c0SHong Zhang   cia[0] = oshift;
28259371c9d4SSatish Balay   for (i = 0; i < n; i++) { cia[i + 1] = cia[i] + collengths[i]; }
28269566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
2827525d23c0SHong Zhang   jj = a->j;
2828525d23c0SHong Zhang   for (row = 0; row < m; row++) {
2829525d23c0SHong Zhang     mr = a->i[row + 1] - a->i[row];
2830525d23c0SHong Zhang     for (i = 0; i < mr; i++) {
2831525d23c0SHong Zhang       col                                         = *jj++;
2832525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2833525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2834525d23c0SHong Zhang     }
2835525d23c0SHong Zhang   }
28369566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
2837071fcb05SBarry Smith   *ia    = cia;
2838071fcb05SBarry Smith   *ja    = cja;
2839525d23c0SHong Zhang   *spidx = cspidx;
2840525d23c0SHong Zhang   PetscFunctionReturn(0);
2841f6d58c54SBarry Smith }
2842f6d58c54SBarry Smith 
28439371c9d4SSatish Balay PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) {
2844525d23c0SHong Zhang   PetscFunctionBegin;
28459566063dSJacob Faibussowitsch   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
28469566063dSJacob Faibussowitsch   PetscCall(PetscFree(*spidx));
2847f6d58c54SBarry Smith   PetscFunctionReturn(0);
2848f6d58c54SBarry Smith }
284999cafbc1SBarry Smith 
28509371c9d4SSatish Balay PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) {
28517d68702bSBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
28527d68702bSBarry Smith 
28537d68702bSBarry Smith   PetscFunctionBegin;
2854*48a46eb9SPierre Jolivet   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
28559566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
28567d68702bSBarry Smith   PetscFunctionReturn(0);
28577d68702bSBarry Smith }
28587d68702bSBarry Smith 
28592593348eSBarry Smith /* -------------------------------------------------------------------*/
28609371c9d4SSatish Balay static struct _MatOps MatOps_Values = {
28619371c9d4SSatish Balay   MatSetValues_SeqBAIJ,
2862cc2dc46cSBarry Smith   MatGetRow_SeqBAIJ,
2863cc2dc46cSBarry Smith   MatRestoreRow_SeqBAIJ,
2864cc2dc46cSBarry Smith   MatMult_SeqBAIJ_N,
286597304618SKris Buschelman   /* 4*/ MatMultAdd_SeqBAIJ_N,
28667c922b88SBarry Smith   MatMultTranspose_SeqBAIJ,
28677c922b88SBarry Smith   MatMultTransposeAdd_SeqBAIJ,
2868f4259b30SLisandro Dalcin   NULL,
2869f4259b30SLisandro Dalcin   NULL,
2870f4259b30SLisandro Dalcin   NULL,
2871f4259b30SLisandro Dalcin   /* 10*/ NULL,
2872cc2dc46cSBarry Smith   MatLUFactor_SeqBAIJ,
2873f4259b30SLisandro Dalcin   NULL,
2874f4259b30SLisandro Dalcin   NULL,
2875f2501298SSatish Balay   MatTranspose_SeqBAIJ,
287697304618SKris Buschelman   /* 15*/ MatGetInfo_SeqBAIJ,
2877cc2dc46cSBarry Smith   MatEqual_SeqBAIJ,
2878cc2dc46cSBarry Smith   MatGetDiagonal_SeqBAIJ,
2879cc2dc46cSBarry Smith   MatDiagonalScale_SeqBAIJ,
2880cc2dc46cSBarry Smith   MatNorm_SeqBAIJ,
2881f4259b30SLisandro Dalcin   /* 20*/ NULL,
2882cc2dc46cSBarry Smith   MatAssemblyEnd_SeqBAIJ,
2883cc2dc46cSBarry Smith   MatSetOption_SeqBAIJ,
2884cc2dc46cSBarry Smith   MatZeroEntries_SeqBAIJ,
2885d519adbfSMatthew Knepley   /* 24*/ MatZeroRows_SeqBAIJ,
2886f4259b30SLisandro Dalcin   NULL,
2887f4259b30SLisandro Dalcin   NULL,
2888f4259b30SLisandro Dalcin   NULL,
2889f4259b30SLisandro Dalcin   NULL,
28904994cf47SJed Brown   /* 29*/ MatSetUp_SeqBAIJ,
2891f4259b30SLisandro Dalcin   NULL,
2892f4259b30SLisandro Dalcin   NULL,
2893f4259b30SLisandro Dalcin   NULL,
2894f4259b30SLisandro Dalcin   NULL,
2895d519adbfSMatthew Knepley   /* 34*/ MatDuplicate_SeqBAIJ,
2896f4259b30SLisandro Dalcin   NULL,
2897f4259b30SLisandro Dalcin   NULL,
2898cc2dc46cSBarry Smith   MatILUFactor_SeqBAIJ,
2899f4259b30SLisandro Dalcin   NULL,
2900d519adbfSMatthew Knepley   /* 39*/ MatAXPY_SeqBAIJ,
29017dae84e0SHong Zhang   MatCreateSubMatrices_SeqBAIJ,
2902cc2dc46cSBarry Smith   MatIncreaseOverlap_SeqBAIJ,
2903cc2dc46cSBarry Smith   MatGetValues_SeqBAIJ,
29043c896bc6SHong Zhang   MatCopy_SeqBAIJ,
2905f4259b30SLisandro Dalcin   /* 44*/ NULL,
2906cc2dc46cSBarry Smith   MatScale_SeqBAIJ,
29077d68702bSBarry Smith   MatShift_SeqBAIJ,
2908f4259b30SLisandro Dalcin   NULL,
290997b48c8fSBarry Smith   MatZeroRowsColumns_SeqBAIJ,
2910f4259b30SLisandro Dalcin   /* 49*/ NULL,
29113b2fbd54SBarry Smith   MatGetRowIJ_SeqBAIJ,
291292c4ed94SBarry Smith   MatRestoreRowIJ_SeqBAIJ,
29133acb8795SBarry Smith   MatGetColumnIJ_SeqBAIJ,
29143acb8795SBarry Smith   MatRestoreColumnIJ_SeqBAIJ,
291593dfae19SHong Zhang   /* 54*/ MatFDColoringCreate_SeqXAIJ,
2916f4259b30SLisandro Dalcin   NULL,
2917f4259b30SLisandro Dalcin   NULL,
2918090001bdSToby Isaac   NULL,
2919d3825aa8SBarry Smith   MatSetValuesBlocked_SeqBAIJ,
29207dae84e0SHong Zhang   /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2921b9b97703SBarry Smith   MatDestroy_SeqBAIJ,
2922b9b97703SBarry Smith   MatView_SeqBAIJ,
2923f4259b30SLisandro Dalcin   NULL,
2924f4259b30SLisandro Dalcin   NULL,
2925f4259b30SLisandro Dalcin   /* 64*/ NULL,
2926f4259b30SLisandro Dalcin   NULL,
2927f4259b30SLisandro Dalcin   NULL,
2928f4259b30SLisandro Dalcin   NULL,
2929f4259b30SLisandro Dalcin   NULL,
2930d519adbfSMatthew Knepley   /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2931f4259b30SLisandro Dalcin   NULL,
2932c87e5d42SMatthew Knepley   MatConvert_Basic,
2933f4259b30SLisandro Dalcin   NULL,
2934f4259b30SLisandro Dalcin   NULL,
2935f4259b30SLisandro Dalcin   /* 74*/ NULL,
2936f6d58c54SBarry Smith   MatFDColoringApply_BAIJ,
2937f4259b30SLisandro Dalcin   NULL,
2938f4259b30SLisandro Dalcin   NULL,
2939f4259b30SLisandro Dalcin   NULL,
2940f4259b30SLisandro Dalcin   /* 79*/ NULL,
2941f4259b30SLisandro Dalcin   NULL,
2942f4259b30SLisandro Dalcin   NULL,
2943f4259b30SLisandro Dalcin   NULL,
29445bba2384SShri Abhyankar   MatLoad_SeqBAIJ,
2945f4259b30SLisandro Dalcin   /* 84*/ NULL,
2946f4259b30SLisandro Dalcin   NULL,
2947f4259b30SLisandro Dalcin   NULL,
2948f4259b30SLisandro Dalcin   NULL,
2949f4259b30SLisandro Dalcin   NULL,
2950f4259b30SLisandro Dalcin   /* 89*/ NULL,
2951f4259b30SLisandro Dalcin   NULL,
2952f4259b30SLisandro Dalcin   NULL,
2953f4259b30SLisandro Dalcin   NULL,
2954f4259b30SLisandro Dalcin   NULL,
2955f4259b30SLisandro Dalcin   /* 94*/ NULL,
2956f4259b30SLisandro Dalcin   NULL,
2957f4259b30SLisandro Dalcin   NULL,
2958f4259b30SLisandro Dalcin   NULL,
2959f4259b30SLisandro Dalcin   NULL,
2960f4259b30SLisandro Dalcin   /* 99*/ NULL,
2961f4259b30SLisandro Dalcin   NULL,
2962f4259b30SLisandro Dalcin   NULL,
29632726fb6dSPierre Jolivet   MatConjugate_SeqBAIJ,
2964f4259b30SLisandro Dalcin   NULL,
2965f4259b30SLisandro Dalcin   /*104*/ NULL,
296699cafbc1SBarry Smith   MatRealPart_SeqBAIJ,
29672af78befSBarry Smith   MatImaginaryPart_SeqBAIJ,
2968f4259b30SLisandro Dalcin   NULL,
2969f4259b30SLisandro Dalcin   NULL,
2970f4259b30SLisandro Dalcin   /*109*/ NULL,
2971f4259b30SLisandro Dalcin   NULL,
2972f4259b30SLisandro Dalcin   NULL,
2973f4259b30SLisandro Dalcin   NULL,
2974547795f9SHong Zhang   MatMissingDiagonal_SeqBAIJ,
2975f4259b30SLisandro Dalcin   /*114*/ NULL,
2976f4259b30SLisandro Dalcin   NULL,
2977f4259b30SLisandro Dalcin   NULL,
2978f4259b30SLisandro Dalcin   NULL,
2979f4259b30SLisandro Dalcin   NULL,
2980f4259b30SLisandro Dalcin   /*119*/ NULL,
2981f4259b30SLisandro Dalcin   NULL,
2982547795f9SHong Zhang   MatMultHermitianTranspose_SeqBAIJ,
2983d6037b41SHong Zhang   MatMultHermitianTransposeAdd_SeqBAIJ,
2984f4259b30SLisandro Dalcin   NULL,
2985f4259b30SLisandro Dalcin   /*124*/ NULL,
2986857cbf51SRichard Tran Mills   MatGetColumnReductions_SeqBAIJ,
29873964eb88SJed Brown   MatInvertBlockDiagonal_SeqBAIJ,
2988f4259b30SLisandro Dalcin   NULL,
2989f4259b30SLisandro Dalcin   NULL,
2990f4259b30SLisandro Dalcin   /*129*/ NULL,
2991f4259b30SLisandro Dalcin   NULL,
2992f4259b30SLisandro Dalcin   NULL,
2993f4259b30SLisandro Dalcin   NULL,
2994f4259b30SLisandro Dalcin   NULL,
2995f4259b30SLisandro Dalcin   /*134*/ NULL,
2996f4259b30SLisandro Dalcin   NULL,
2997f4259b30SLisandro Dalcin   NULL,
2998f4259b30SLisandro Dalcin   NULL,
2999f4259b30SLisandro Dalcin   NULL,
300046533700Sstefano_zampini   /*139*/ MatSetBlockSizes_Default,
3001f4259b30SLisandro Dalcin   NULL,
3002f4259b30SLisandro Dalcin   NULL,
3003bdf6f3fcSHong Zhang   MatFDColoringSetUp_SeqXAIJ,
3004f4259b30SLisandro Dalcin   NULL,
300586e85357SHong Zhang   /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3006d70f29a3SPierre Jolivet   MatDestroySubMatrices_SeqBAIJ,
3007d70f29a3SPierre Jolivet   NULL,
300899a7f59eSMark Adams   NULL,
300999a7f59eSMark Adams   NULL,
30107fb60732SBarry Smith   NULL,
30117fb60732SBarry Smith   /*150*/ NULL,
301299cafbc1SBarry Smith };
30132593348eSBarry Smith 
30149371c9d4SSatish Balay PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) {
30153e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
30168ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
30173e90b805SBarry Smith 
30183e90b805SBarry Smith   PetscFunctionBegin;
30195f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
30203e90b805SBarry Smith 
30213e90b805SBarry Smith   /* allocate space for values if not already there */
30223e90b805SBarry Smith   if (!aij->saved_values) {
30239566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
30249566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)mat, (nz + 1) * sizeof(PetscScalar)));
30253e90b805SBarry Smith   }
30263e90b805SBarry Smith 
30273e90b805SBarry Smith   /* copy values over */
30289566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
30293e90b805SBarry Smith   PetscFunctionReturn(0);
30303e90b805SBarry Smith }
30313e90b805SBarry Smith 
30329371c9d4SSatish Balay PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) {
30333e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
30348ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
30353e90b805SBarry Smith 
30363e90b805SBarry Smith   PetscFunctionBegin;
30375f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
30385f80ce2aSJacob Faibussowitsch   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
30393e90b805SBarry Smith 
30403e90b805SBarry Smith   /* copy values over */
30419566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
30423e90b805SBarry Smith   PetscFunctionReturn(0);
30433e90b805SBarry Smith }
30443e90b805SBarry Smith 
3045cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3046cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3047273d9f13SBarry Smith 
30489371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) {
3049a23d5eceSKris Buschelman   Mat_SeqBAIJ *b;
3050535b19f3SBarry Smith   PetscInt     i, mbs, nbs, bs2;
30518afaa268SBarry Smith   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3052a23d5eceSKris Buschelman 
3053a23d5eceSKris Buschelman   PetscFunctionBegin;
30542576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3055ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
3056ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
3057ab93d7beSBarry Smith     nz             = 0;
3058ab93d7beSBarry Smith   }
30598c07d4e3SBarry Smith 
30609566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
30619566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
30629566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
30639566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3064899cda47SBarry Smith 
3065899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
3066899cda47SBarry Smith 
3067d0f46423SBarry Smith   mbs = B->rmap->n / bs;
3068d0f46423SBarry Smith   nbs = B->cmap->n / bs;
3069a23d5eceSKris Buschelman   bs2 = bs * bs;
3070a23d5eceSKris Buschelman 
30715f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3072a23d5eceSKris Buschelman 
3073a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
30745f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3075a23d5eceSKris Buschelman   if (nnz) {
3076a23d5eceSKris Buschelman     for (i = 0; i < mbs; i++) {
30775f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
30785f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3079a23d5eceSKris Buschelman     }
3080a23d5eceSKris Buschelman   }
3081a23d5eceSKris Buschelman 
3082a23d5eceSKris Buschelman   b = (Mat_SeqBAIJ *)B->data;
3083d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
30849566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3085d0609cedSBarry Smith   PetscOptionsEnd();
30868c07d4e3SBarry Smith 
3087a23d5eceSKris Buschelman   if (!flg) {
3088a23d5eceSKris Buschelman     switch (bs) {
3089a23d5eceSKris Buschelman     case 1:
3090a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
3091a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3092a23d5eceSKris Buschelman       break;
3093a23d5eceSKris Buschelman     case 2:
3094a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
3095a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3096a23d5eceSKris Buschelman       break;
3097a23d5eceSKris Buschelman     case 3:
3098a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
3099a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3100a23d5eceSKris Buschelman       break;
3101a23d5eceSKris Buschelman     case 4:
3102a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
3103a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3104a23d5eceSKris Buschelman       break;
3105a23d5eceSKris Buschelman     case 5:
3106a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
3107a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3108a23d5eceSKris Buschelman       break;
3109a23d5eceSKris Buschelman     case 6:
3110a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
3111a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3112a23d5eceSKris Buschelman       break;
3113a23d5eceSKris Buschelman     case 7:
3114a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
3115a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3116a23d5eceSKris Buschelman       break;
31179371c9d4SSatish Balay     case 9: {
31186679dcc1SBarry Smith       PetscInt version = 1;
31199566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
31206679dcc1SBarry Smith       switch (version) {
31215f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
31226679dcc1SBarry Smith       case 1:
312396e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
312496e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
31259566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
31266679dcc1SBarry Smith         break;
31276679dcc1SBarry Smith #endif
31286679dcc1SBarry Smith       default:
312996e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
313096e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
31319566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
313296e086a2SDaniel Kokron         break;
31336679dcc1SBarry Smith       }
31346679dcc1SBarry Smith       break;
31356679dcc1SBarry Smith     }
3136ebada01fSBarry Smith     case 11:
3137ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
3138ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3139ebada01fSBarry Smith       break;
31409371c9d4SSatish Balay     case 12: {
31416679dcc1SBarry Smith       PetscInt version = 1;
31429566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
31436679dcc1SBarry Smith       switch (version) {
31446679dcc1SBarry Smith       case 1:
31456679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
31466679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
31479566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31488ab949d8SShri Abhyankar         break;
31496679dcc1SBarry Smith       case 2:
31506679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
31516679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
31529566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31536679dcc1SBarry Smith         break;
31546679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
31556679dcc1SBarry Smith       case 3:
31566679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
31576679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
31589566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
31596679dcc1SBarry Smith         break;
31606679dcc1SBarry Smith #endif
3161a23d5eceSKris Buschelman       default:
3162a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
3163a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
31649566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
31656679dcc1SBarry Smith         break;
31666679dcc1SBarry Smith       }
31676679dcc1SBarry Smith       break;
31686679dcc1SBarry Smith     }
31699371c9d4SSatish Balay     case 15: {
31706679dcc1SBarry Smith       PetscInt version = 1;
31719566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
31726679dcc1SBarry Smith       switch (version) {
31736679dcc1SBarry Smith       case 1:
31746679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
31759566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31766679dcc1SBarry Smith         break;
31776679dcc1SBarry Smith       case 2:
31786679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
31799566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31806679dcc1SBarry Smith         break;
31816679dcc1SBarry Smith       case 3:
31826679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
31839566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31846679dcc1SBarry Smith         break;
31856679dcc1SBarry Smith       case 4:
31866679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
31879566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31886679dcc1SBarry Smith         break;
31896679dcc1SBarry Smith       default:
31906679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_N;
31919566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
31926679dcc1SBarry Smith         break;
31936679dcc1SBarry Smith       }
31946679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
31956679dcc1SBarry Smith       break;
31966679dcc1SBarry Smith     }
31976679dcc1SBarry Smith     default:
31986679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
31996679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
32009566063dSJacob Faibussowitsch       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3201a23d5eceSKris Buschelman       break;
3202a23d5eceSKris Buschelman     }
3203a23d5eceSKris Buschelman   }
3204e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
3205a23d5eceSKris Buschelman   b->mbs      = mbs;
3206a23d5eceSKris Buschelman   b->nbs      = nbs;
3207ab93d7beSBarry Smith   if (!skipallocation) {
32082ee49352SLisandro Dalcin     if (!b->imax) {
32099566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
32109566063dSJacob Faibussowitsch       PetscCall(PetscLogObjectMemory((PetscObject)B, 2 * mbs * sizeof(PetscInt)));
321126fbe8dcSKarl Rupp 
32124fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
32132ee49352SLisandro Dalcin     }
3214ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
321526fbe8dcSKarl Rupp     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3216a23d5eceSKris Buschelman     if (!nnz) {
3217a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3218c62bd62aSJed Brown       else if (nz < 0) nz = 1;
32195d2a9ed1SStefano Zampini       nz = PetscMin(nz, nbs);
3220a23d5eceSKris Buschelman       for (i = 0; i < mbs; i++) b->imax[i] = nz;
32219566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, mbs, &nz));
3222a23d5eceSKris Buschelman     } else {
3223c73702f5SBarry Smith       PetscInt64 nz64 = 0;
32249371c9d4SSatish Balay       for (i = 0; i < mbs; i++) {
32259371c9d4SSatish Balay         b->imax[i] = nnz[i];
32269371c9d4SSatish Balay         nz64 += nnz[i];
32279371c9d4SSatish Balay       }
32289566063dSJacob Faibussowitsch       PetscCall(PetscIntCast(nz64, &nz));
3229a23d5eceSKris Buschelman     }
3230a23d5eceSKris Buschelman 
3231a23d5eceSKris Buschelman     /* allocate the matrix space */
32329566063dSJacob Faibussowitsch     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3233672ba085SHong Zhang     if (B->structure_only) {
32349566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz, &b->j));
32359566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
32369566063dSJacob Faibussowitsch       PetscCall(PetscLogObjectMemory((PetscObject)B, (B->rmap->N + 1) * sizeof(PetscInt) + nz * sizeof(PetscInt)));
3237672ba085SHong Zhang     } else {
32386679dcc1SBarry Smith       PetscInt nzbs2 = 0;
32399566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
32409566063dSJacob Faibussowitsch       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
32419566063dSJacob Faibussowitsch       PetscCall(PetscLogObjectMemory((PetscObject)B, (B->rmap->N + 1) * sizeof(PetscInt) + nz * (bs2 * sizeof(PetscScalar) + sizeof(PetscInt))));
32429566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(b->a, nz * bs2));
3243672ba085SHong Zhang     }
32449566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(b->j, nz));
324526fbe8dcSKarl Rupp 
3246672ba085SHong Zhang     if (B->structure_only) {
3247672ba085SHong Zhang       b->singlemalloc = PETSC_FALSE;
3248672ba085SHong Zhang       b->free_a       = PETSC_FALSE;
3249672ba085SHong Zhang     } else {
3250a23d5eceSKris Buschelman       b->singlemalloc = PETSC_TRUE;
3251672ba085SHong Zhang       b->free_a       = PETSC_TRUE;
3252672ba085SHong Zhang     }
3253672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
3254672ba085SHong Zhang 
3255a23d5eceSKris Buschelman     b->i[0] = 0;
32569371c9d4SSatish Balay     for (i = 1; i < mbs + 1; i++) { b->i[i] = b->i[i - 1] + b->imax[i - 1]; }
3257672ba085SHong Zhang 
3258e811da20SHong Zhang   } else {
3259e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3260e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3261ab93d7beSBarry Smith   }
3262a23d5eceSKris Buschelman 
3263a23d5eceSKris Buschelman   b->bs2              = bs2;
3264a23d5eceSKris Buschelman   b->mbs              = mbs;
3265a23d5eceSKris Buschelman   b->nz               = 0;
3266b32cb4a7SJed Brown   b->maxnz            = nz;
3267b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3268cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3269cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
32709566063dSJacob Faibussowitsch   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3271a23d5eceSKris Buschelman   PetscFunctionReturn(0);
3272a23d5eceSKris Buschelman }
3273a23d5eceSKris Buschelman 
32749371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) {
3275725b52f3SLisandro Dalcin   PetscInt     i, m, nz, nz_max = 0, *nnz;
3276f4259b30SLisandro Dalcin   PetscScalar *values      = NULL;
3277d47bf9aaSJed Brown   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3278725b52f3SLisandro Dalcin 
3279725b52f3SLisandro Dalcin   PetscFunctionBegin;
32805f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
32819566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
32829566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
32839566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
32849566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
32859566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3286d0f46423SBarry Smith   m = B->rmap->n / bs;
3287725b52f3SLisandro Dalcin 
32885f80ce2aSJacob Faibussowitsch   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
32899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &nnz));
3290725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3291cf12db73SBarry Smith     nz = ii[i + 1] - ii[i];
32925f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3293725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3294725b52f3SLisandro Dalcin     nnz[i] = nz;
3295725b52f3SLisandro Dalcin   }
32969566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
32979566063dSJacob Faibussowitsch   PetscCall(PetscFree(nnz));
3298725b52f3SLisandro Dalcin 
3299725b52f3SLisandro Dalcin   values = (PetscScalar *)V;
3300*48a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3301725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3302cf12db73SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
3303cf12db73SBarry Smith     const PetscInt *icols = jj + ii[i];
3304bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3305cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
33069566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
33073adadaf3SJed Brown     } else {
33083adadaf3SJed Brown       PetscInt j;
33093adadaf3SJed Brown       for (j = 0; j < ncols; j++) {
33103adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
33119566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
33123adadaf3SJed Brown       }
33133adadaf3SJed Brown     }
3314725b52f3SLisandro Dalcin   }
33159566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
33169566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
33179566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
33189566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3319725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3320725b52f3SLisandro Dalcin }
3321725b52f3SLisandro Dalcin 
3322cda14afcSprj- /*@C
3323cda14afcSprj-    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3324cda14afcSprj- 
3325cda14afcSprj-    Not Collective
3326cda14afcSprj- 
3327cda14afcSprj-    Input Parameter:
3328cda14afcSprj- .  mat - a MATSEQBAIJ matrix
3329cda14afcSprj- 
3330cda14afcSprj-    Output Parameter:
3331cda14afcSprj- .   array - pointer to the data
3332cda14afcSprj- 
3333cda14afcSprj-    Level: intermediate
3334cda14afcSprj- 
3335db781477SPatrick Sanan .seealso: `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3336cda14afcSprj- @*/
33379371c9d4SSatish Balay PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) {
3338cda14afcSprj-   PetscFunctionBegin;
3339cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3340cda14afcSprj-   PetscFunctionReturn(0);
3341cda14afcSprj- }
3342cda14afcSprj- 
3343cda14afcSprj- /*@C
3344cda14afcSprj-    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3345cda14afcSprj- 
3346cda14afcSprj-    Not Collective
3347cda14afcSprj- 
3348cda14afcSprj-    Input Parameters:
3349cda14afcSprj- +  mat - a MATSEQBAIJ matrix
3350cda14afcSprj- -  array - pointer to the data
3351cda14afcSprj- 
3352cda14afcSprj-    Level: intermediate
3353cda14afcSprj- 
3354db781477SPatrick Sanan .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3355cda14afcSprj- @*/
33569371c9d4SSatish Balay PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) {
3357cda14afcSprj-   PetscFunctionBegin;
3358cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3359cda14afcSprj-   PetscFunctionReturn(0);
3360cda14afcSprj- }
3361cda14afcSprj- 
33620bad9183SKris Buschelman /*MC
3363fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
33640bad9183SKris Buschelman    block sparse compressed row format.
33650bad9183SKris Buschelman 
33660bad9183SKris Buschelman    Options Database Keys:
33676679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
33686679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
33690bad9183SKris Buschelman 
33700bad9183SKris Buschelman    Level: beginner
33710cd7f59aSBarry Smith 
33720cd7f59aSBarry Smith    Notes:
33730cd7f59aSBarry Smith     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
33740cd7f59aSBarry Smith     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
33750bad9183SKris Buschelman 
33766679dcc1SBarry Smith    Run with -info to see what version of the matrix-vector product is being used
33776679dcc1SBarry Smith 
3378db781477SPatrick Sanan .seealso: `MatCreateSeqBAIJ()`
33790bad9183SKris Buschelman M*/
33800bad9183SKris Buschelman 
3381cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3382b24902e0SBarry Smith 
33839371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) {
3384c1ac3661SBarry Smith   PetscMPIInt  size;
3385b6490206SBarry Smith   Mat_SeqBAIJ *b;
33863b2fbd54SBarry Smith 
33873a40ed3dSBarry Smith   PetscFunctionBegin;
33889566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
33895f80ce2aSJacob Faibussowitsch   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3390b6490206SBarry Smith 
33919566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(B, &b));
3392b0a32e0cSBarry Smith   B->data = (void *)b;
33939566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
339426fbe8dcSKarl Rupp 
3395f4259b30SLisandro Dalcin   b->row          = NULL;
3396f4259b30SLisandro Dalcin   b->col          = NULL;
3397f4259b30SLisandro Dalcin   b->icol         = NULL;
33982593348eSBarry Smith   b->reallocs     = 0;
3399f4259b30SLisandro Dalcin   b->saved_values = NULL;
34002593348eSBarry Smith 
3401c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
34022593348eSBarry Smith   b->nonew              = 0;
3403f4259b30SLisandro Dalcin   b->diag               = NULL;
3404f4259b30SLisandro Dalcin   B->spptr              = NULL;
3405b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3406a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
34074e220ebcSLois Curfman McInnes 
34089566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
34099566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
34109566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
34119566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
34129566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
34139566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
34149566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
34159566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
34169566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
34179566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
34187ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
34199566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
34207ea3e4caSstefano_zampini #endif
34219566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
34229566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
34233a40ed3dSBarry Smith   PetscFunctionReturn(0);
34242593348eSBarry Smith }
34252593348eSBarry Smith 
34269371c9d4SSatish Balay PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) {
3427b24902e0SBarry Smith   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3428a96a251dSBarry Smith   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3429de6a44a3SBarry Smith 
34303a40ed3dSBarry Smith   PetscFunctionBegin;
34315f80ce2aSJacob Faibussowitsch   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
34322593348eSBarry Smith 
34334fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
34344fd072dbSBarry Smith     c->imax           = a->imax;
34354fd072dbSBarry Smith     c->ilen           = a->ilen;
34364fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
34374fd072dbSBarry Smith   } else {
34389566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
34399566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)C, 2 * mbs * sizeof(PetscInt)));
3440b6490206SBarry Smith     for (i = 0; i < mbs; i++) {
34412593348eSBarry Smith       c->imax[i] = a->imax[i];
34422593348eSBarry Smith       c->ilen[i] = a->ilen[i];
34432593348eSBarry Smith     }
34444fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
34454fd072dbSBarry Smith   }
34462593348eSBarry Smith 
34472593348eSBarry Smith   /* allocate the matrix space */
344816a2bf60SHong Zhang   if (mallocmatspace) {
34494fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
34509566063dSJacob Faibussowitsch       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
34519566063dSJacob Faibussowitsch       PetscCall(PetscLogObjectMemory((PetscObject)C, a->i[mbs] * bs2 * sizeof(PetscScalar)));
345226fbe8dcSKarl Rupp 
34534fd072dbSBarry Smith       c->i            = a->i;
34544fd072dbSBarry Smith       c->j            = a->j;
3455379be0ddSLisandro Dalcin       c->singlemalloc = PETSC_FALSE;
3456379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
3457379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
34584fd072dbSBarry Smith       c->parent       = A;
34591e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
34601e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
346126fbe8dcSKarl Rupp 
34629566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)A));
34639566063dSJacob Faibussowitsch       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
34649566063dSJacob Faibussowitsch       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
34654fd072dbSBarry Smith     } else {
34669566063dSJacob Faibussowitsch       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
34679566063dSJacob Faibussowitsch       PetscCall(PetscLogObjectMemory((PetscObject)C, a->i[mbs] * (bs2 * sizeof(PetscScalar) + sizeof(PetscInt)) + (mbs + 1) * sizeof(PetscInt)));
346826fbe8dcSKarl Rupp 
3469c4992f7dSBarry Smith       c->singlemalloc = PETSC_TRUE;
3470379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
34714fd072dbSBarry Smith       c->free_ij      = PETSC_TRUE;
347226fbe8dcSKarl Rupp 
34739566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3474b6490206SBarry Smith       if (mbs > 0) {
34759566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(c->j, a->j, nz));
34762e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
34779566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
34782e8a6d31SBarry Smith         } else {
34799566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(c->a, bs2 * nz));
34802593348eSBarry Smith         }
34812593348eSBarry Smith       }
34821e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
34831e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
348416a2bf60SHong Zhang     }
34854fd072dbSBarry Smith   }
348616a2bf60SHong Zhang 
34872593348eSBarry Smith   c->roworiented = a->roworiented;
34882593348eSBarry Smith   c->nonew       = a->nonew;
348926fbe8dcSKarl Rupp 
34909566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
34919566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
349226fbe8dcSKarl Rupp 
34935c9eb25fSBarry Smith   c->bs2 = a->bs2;
34945c9eb25fSBarry Smith   c->mbs = a->mbs;
34955c9eb25fSBarry Smith   c->nbs = a->nbs;
34962593348eSBarry Smith 
34972593348eSBarry Smith   if (a->diag) {
34984fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
34994fd072dbSBarry Smith       c->diag      = a->diag;
35004fd072dbSBarry Smith       c->free_diag = PETSC_FALSE;
35014fd072dbSBarry Smith     } else {
35029566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
35039566063dSJacob Faibussowitsch       PetscCall(PetscLogObjectMemory((PetscObject)C, (mbs + 1) * sizeof(PetscInt)));
350426fbe8dcSKarl Rupp       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
35054fd072dbSBarry Smith       c->free_diag = PETSC_TRUE;
35064fd072dbSBarry Smith     }
3507f4259b30SLisandro Dalcin   } else c->diag = NULL;
350826fbe8dcSKarl Rupp 
35092593348eSBarry Smith   c->nz         = a->nz;
3510f2cbd3d5SJed Brown   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3511f361c04dSBarry Smith   c->solve_work = NULL;
3512f361c04dSBarry Smith   c->mult_work  = NULL;
3513f361c04dSBarry Smith   c->sor_workt  = NULL;
3514f361c04dSBarry Smith   c->sor_work   = NULL;
351588e51ccdSHong Zhang 
351688e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
351788e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3518cd6b891eSBarry Smith   if (a->compressedrow.use) {
351988e51ccdSHong Zhang     i = a->compressedrow.nrows;
35209566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
35219566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)C, (2 * i + 1) * sizeof(PetscInt)));
35229566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
35239566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
352488e51ccdSHong Zhang   } else {
352588e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
35260298fd71SBarry Smith     c->compressedrow.i      = NULL;
35270298fd71SBarry Smith     c->compressedrow.rindex = NULL;
352888e51ccdSHong Zhang   }
3529e56f5c9eSBarry Smith   C->nonzerostate = A->nonzerostate;
353026fbe8dcSKarl Rupp 
35319566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
35323a40ed3dSBarry Smith   PetscFunctionReturn(0);
35332593348eSBarry Smith }
35342593348eSBarry Smith 
35359371c9d4SSatish Balay PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) {
3536b24902e0SBarry Smith   PetscFunctionBegin;
35379566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
35389566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
35399566063dSJacob Faibussowitsch   PetscCall(MatSetType(*B, MATSEQBAIJ));
35409566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3541b24902e0SBarry Smith   PetscFunctionReturn(0);
3542b24902e0SBarry Smith }
3543b24902e0SBarry Smith 
3544618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
35459371c9d4SSatish Balay PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) {
3546b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3547b51a4376SLisandro Dalcin   PetscInt    *rowidxs, *colidxs;
3548b51a4376SLisandro Dalcin   PetscScalar *matvals;
3549b51a4376SLisandro Dalcin 
3550b51a4376SLisandro Dalcin   PetscFunctionBegin;
35519566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
3552b51a4376SLisandro Dalcin 
3553b51a4376SLisandro Dalcin   /* read matrix header */
35549566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
35555f80ce2aSJacob Faibussowitsch   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
35569371c9d4SSatish Balay   M  = header[1];
35579371c9d4SSatish Balay   N  = header[2];
35589371c9d4SSatish Balay   nz = header[3];
35595f80ce2aSJacob Faibussowitsch   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
35605f80ce2aSJacob Faibussowitsch   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
35615f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3562b51a4376SLisandro Dalcin 
3563b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
35649566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3565b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3566b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3567b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3568b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3569b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
35709566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
35719566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
3572b51a4376SLisandro Dalcin 
3573b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
35749566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
35755f80ce2aSJacob Faibussowitsch   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
35769566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
35779566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
35789371c9d4SSatish Balay   mbs = m / bs;
35799371c9d4SSatish Balay   nbs = n / bs;
3580b51a4376SLisandro Dalcin 
3581b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
35829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
35839566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
35849371c9d4SSatish Balay   rowidxs[0] = 0;
35859371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3586b51a4376SLisandro Dalcin   sum = rowidxs[m];
35875f80ce2aSJacob Faibussowitsch   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3588b51a4376SLisandro Dalcin 
3589b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
35909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
35919566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
35929566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3593b51a4376SLisandro Dalcin 
3594b51a4376SLisandro Dalcin   {               /* preallocate matrix storage */
3595b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3596b51a4376SLisandro Dalcin     PetscInt *nnz;
3597618cc2edSLisandro Dalcin     PetscBool sbaij;
3598b51a4376SLisandro Dalcin 
35999566063dSJacob Faibussowitsch     PetscCall(PetscBTCreate(nbs, &bt));
36009566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(mbs, &nnz));
36019566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3602b51a4376SLisandro Dalcin     for (i = 0; i < mbs; i++) {
36039566063dSJacob Faibussowitsch       PetscCall(PetscBTMemzero(nbs, bt));
3604618cc2edSLisandro Dalcin       for (k = 0; k < bs; k++) {
3605618cc2edSLisandro Dalcin         PetscInt row = bs * i + k;
3606618cc2edSLisandro Dalcin         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3607618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3608618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3609618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3610618cc2edSLisandro Dalcin         }
3611618cc2edSLisandro Dalcin       }
3612b51a4376SLisandro Dalcin     }
36139566063dSJacob Faibussowitsch     PetscCall(PetscBTDestroy(&bt));
36149566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
36159566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
36169566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
3617b51a4376SLisandro Dalcin   }
3618b51a4376SLisandro Dalcin 
3619b51a4376SLisandro Dalcin   /* store matrix values */
3620b51a4376SLisandro Dalcin   for (i = 0; i < m; i++) {
3621b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
36229566063dSJacob Faibussowitsch     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3623b51a4376SLisandro Dalcin   }
3624b51a4376SLisandro Dalcin 
36259566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
36269566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
36279566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
36289566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3629b51a4376SLisandro Dalcin   PetscFunctionReturn(0);
3630b51a4376SLisandro Dalcin }
3631b51a4376SLisandro Dalcin 
36329371c9d4SSatish Balay PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) {
36337f489da9SVaclav Hapla   PetscBool isbinary;
3634f501eaabSShri Abhyankar 
3635f501eaabSShri Abhyankar   PetscFunctionBegin;
36369566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
36375f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
36389566063dSJacob Faibussowitsch   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3639f501eaabSShri Abhyankar   PetscFunctionReturn(0);
3640f501eaabSShri Abhyankar }
3641f501eaabSShri Abhyankar 
3642273d9f13SBarry Smith /*@C
3643273d9f13SBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3644273d9f13SBarry Smith    compressed row) format.  For good matrix assembly performance the
3645273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3646273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3647273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
36482593348eSBarry Smith 
3649d083f849SBarry Smith    Collective
3650273d9f13SBarry Smith 
3651273d9f13SBarry Smith    Input Parameters:
3652273d9f13SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
3653bb7ae925SBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3654bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3655273d9f13SBarry Smith .  m - number of rows
3656273d9f13SBarry Smith .  n - number of columns
365735d8aa7fSBarry Smith .  nz - number of nonzero blocks  per block row (same for all rows)
365835d8aa7fSBarry Smith -  nnz - array containing the number of nonzero blocks in the various block rows
36590298fd71SBarry Smith          (possibly different for each block row) or NULL
3660273d9f13SBarry Smith 
3661273d9f13SBarry Smith    Output Parameter:
3662273d9f13SBarry Smith .  A - the matrix
3663273d9f13SBarry Smith 
3664175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3665f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3666175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3667175b88e8SBarry Smith 
3668273d9f13SBarry Smith    Options Database Keys:
3669a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
3670273d9f13SBarry Smith                      block calculations (much slower)
3671a2b725a8SWilliam Gropp -    -mat_block_size - size of the blocks to use
3672273d9f13SBarry Smith 
3673273d9f13SBarry Smith    Level: intermediate
3674273d9f13SBarry Smith 
3675273d9f13SBarry Smith    Notes:
3676d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
3677d1be2dadSMatthew Knepley 
367849a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
367949a6f317SBarry Smith 
368035d8aa7fSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
368135d8aa7fSBarry Smith 
3682273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
3683273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3684273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3685273d9f13SBarry Smith 
3686273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
36870298fd71SBarry Smith    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3688a7f22e61SSatish Balay    allocation.  See Users-Manual: ch_mat for details.
3689273d9f13SBarry Smith    matrices.
3690273d9f13SBarry Smith 
3691db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3692273d9f13SBarry Smith @*/
36939371c9d4SSatish Balay PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) {
3694273d9f13SBarry Smith   PetscFunctionBegin;
36959566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
36969566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
36979566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJ));
36989566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3699273d9f13SBarry Smith   PetscFunctionReturn(0);
3700273d9f13SBarry Smith }
3701273d9f13SBarry Smith 
3702273d9f13SBarry Smith /*@C
3703273d9f13SBarry Smith    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3704273d9f13SBarry Smith    per row in the matrix. For good matrix assembly performance the
3705273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3706273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3707273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
3708273d9f13SBarry Smith 
3709d083f849SBarry Smith    Collective
3710273d9f13SBarry Smith 
3711273d9f13SBarry Smith    Input Parameters:
37121c4f3114SJed Brown +  B - the matrix
3713bb7ae925SBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3714bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3715273d9f13SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
3716273d9f13SBarry Smith -  nnz - array containing the number of block nonzeros in the various block rows
37170298fd71SBarry Smith          (possibly different for each block row) or NULL
3718273d9f13SBarry Smith 
3719273d9f13SBarry Smith    Options Database Keys:
3720a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
3721273d9f13SBarry Smith                      block calculations (much slower)
3722a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
3723273d9f13SBarry Smith 
3724273d9f13SBarry Smith    Level: intermediate
3725273d9f13SBarry Smith 
3726273d9f13SBarry Smith    Notes:
372749a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
372849a6f317SBarry Smith 
3729aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3730aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3731aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3732aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3733aa95bbe8SBarry Smith 
3734273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
3735273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3736273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3737273d9f13SBarry Smith 
3738273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
37390298fd71SBarry Smith    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3740a7f22e61SSatish Balay    allocation.  See Users-Manual: ch_mat for details.
3741273d9f13SBarry Smith 
3742db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3743273d9f13SBarry Smith @*/
37449371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) {
3745273d9f13SBarry Smith   PetscFunctionBegin;
37466ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
37476ba663aaSJed Brown   PetscValidType(B, 1);
37486ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3749cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3750273d9f13SBarry Smith   PetscFunctionReturn(0);
3751273d9f13SBarry Smith }
3752a1d92eedSBarry Smith 
3753725b52f3SLisandro Dalcin /*@C
3754664954b6SBarry Smith    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3755725b52f3SLisandro Dalcin 
3756d083f849SBarry Smith    Collective
3757725b52f3SLisandro Dalcin 
3758725b52f3SLisandro Dalcin    Input Parameters:
37591c4f3114SJed Brown +  B - the matrix
3760725b52f3SLisandro Dalcin .  i - the indices into j for the start of each local row (starts with zero)
3761725b52f3SLisandro Dalcin .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3762725b52f3SLisandro Dalcin -  v - optional values in the matrix
3763725b52f3SLisandro Dalcin 
3764664954b6SBarry Smith    Level: advanced
3765725b52f3SLisandro Dalcin 
37663adadaf3SJed Brown    Notes:
37673adadaf3SJed Brown    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
37683adadaf3SJed Brown    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
37693adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
37703adadaf3SJed Brown    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
37713adadaf3SJed Brown    block column and the second index is over columns within a block.
37723adadaf3SJed Brown 
3773664954b6SBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3774664954b6SBarry Smith 
3775db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3776725b52f3SLisandro Dalcin @*/
37779371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
3778725b52f3SLisandro Dalcin   PetscFunctionBegin;
37796ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
37806ba663aaSJed Brown   PetscValidType(B, 1);
37816ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3782cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3783725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3784725b52f3SLisandro Dalcin }
3785725b52f3SLisandro Dalcin 
3786c75a6043SHong Zhang /*@
3787dfb205c3SBarry Smith      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3788c75a6043SHong Zhang 
3789d083f849SBarry Smith      Collective
3790c75a6043SHong Zhang 
3791c75a6043SHong Zhang    Input Parameters:
3792c75a6043SHong Zhang +  comm - must be an MPI communicator of size 1
3793c75a6043SHong Zhang .  bs - size of block
3794c75a6043SHong Zhang .  m - number of rows
3795c75a6043SHong Zhang .  n - number of columns
3796483a2f95SBarry Smith .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3797c75a6043SHong Zhang .  j - column indices
3798c75a6043SHong Zhang -  a - matrix values
3799c75a6043SHong Zhang 
3800c75a6043SHong Zhang    Output Parameter:
3801c75a6043SHong Zhang .  mat - the matrix
3802c75a6043SHong Zhang 
3803dfb205c3SBarry Smith    Level: advanced
3804c75a6043SHong Zhang 
3805c75a6043SHong Zhang    Notes:
3806c75a6043SHong Zhang        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3807c75a6043SHong Zhang     once the matrix is destroyed
3808c75a6043SHong Zhang 
3809c75a6043SHong Zhang        You cannot set new nonzero locations into this matrix, that will generate an error.
3810c75a6043SHong Zhang 
3811c75a6043SHong Zhang        The i and j indices are 0 based
3812c75a6043SHong Zhang 
3813dfb205c3SBarry Smith        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3814dfb205c3SBarry Smith 
38153adadaf3SJed Brown       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
38163adadaf3SJed Brown       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
38173adadaf3SJed Brown       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
38183adadaf3SJed Brown       with column-major ordering within blocks.
3819dfb205c3SBarry Smith 
3820db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3821c75a6043SHong Zhang 
3822c75a6043SHong Zhang @*/
38239371c9d4SSatish Balay PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) {
3824c75a6043SHong Zhang   PetscInt     ii;
3825c75a6043SHong Zhang   Mat_SeqBAIJ *baij;
3826c75a6043SHong Zhang 
3827c75a6043SHong Zhang   PetscFunctionBegin;
38285f80ce2aSJacob Faibussowitsch   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
38295f80ce2aSJacob Faibussowitsch   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3830c75a6043SHong Zhang 
38319566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
38329566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, m, n));
38339566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATSEQBAIJ));
38349566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3835c75a6043SHong Zhang   baij = (Mat_SeqBAIJ *)(*mat)->data;
38369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
38379566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)*mat, 2 * m * sizeof(PetscInt)));
3838c75a6043SHong Zhang 
3839c75a6043SHong Zhang   baij->i = i;
3840c75a6043SHong Zhang   baij->j = j;
3841c75a6043SHong Zhang   baij->a = a;
384226fbe8dcSKarl Rupp 
3843c75a6043SHong Zhang   baij->singlemalloc = PETSC_FALSE;
3844c75a6043SHong Zhang   baij->nonew        = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3845e6b907acSBarry Smith   baij->free_a       = PETSC_FALSE;
3846e6b907acSBarry Smith   baij->free_ij      = PETSC_FALSE;
3847c75a6043SHong Zhang 
3848c75a6043SHong Zhang   for (ii = 0; ii < m; ii++) {
3849c75a6043SHong Zhang     baij->ilen[ii] = baij->imax[ii] = i[ii + 1] - i[ii];
38506bdcaf15SBarry Smith     PetscCheck(i[ii + 1] - i[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, i[ii + 1] - i[ii]);
3851c75a6043SHong Zhang   }
385276bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
3853c75a6043SHong Zhang     for (ii = 0; ii < baij->i[m]; ii++) {
38546bdcaf15SBarry Smith       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
38556bdcaf15SBarry Smith       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3856c75a6043SHong Zhang     }
385776bd3646SJed Brown   }
3858c75a6043SHong Zhang 
38599566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
38609566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3861c75a6043SHong Zhang   PetscFunctionReturn(0);
3862c75a6043SHong Zhang }
3863bdf6f3fcSHong Zhang 
38649371c9d4SSatish Balay PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
3865bdf6f3fcSHong Zhang   PetscFunctionBegin;
38669566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3867bdf6f3fcSHong Zhang   PetscFunctionReturn(0);
3868bdf6f3fcSHong Zhang }
3869