xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
1be1d678aSKris Buschelman 
22593348eSBarry Smith /*
3b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
42593348eSBarry Smith   matrix storage format.
52593348eSBarry Smith */
6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7c6db04a5SJed Brown #include <petscblaslapack.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
1043516a2dSKris Buschelman 
117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
137ea3e4caSstefano_zampini #endif
147ea3e4caSstefano_zampini 
15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
17b5b72c8aSIrina Sokolova #endif
18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
19b5b72c8aSIrina Sokolova 
209371c9d4SSatish Balay PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) {
219463ebdaSPierre Jolivet   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
22857cbf51SRichard Tran Mills   PetscInt     m, n, i;
239463ebdaSPierre Jolivet   PetscInt     ib, jb, bs = A->rmap->bs;
249463ebdaSPierre Jolivet   MatScalar   *a_val = a_aij->a;
259463ebdaSPierre Jolivet 
269463ebdaSPierre Jolivet   PetscFunctionBegin;
279566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
28857cbf51SRichard Tran Mills   for (i = 0; i < n; i++) reductions[i] = 0.0;
299463ebdaSPierre Jolivet   if (type == NORM_2) {
309463ebdaSPierre Jolivet     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
319463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
329463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
33857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
349463ebdaSPierre Jolivet           a_val++;
359463ebdaSPierre Jolivet         }
369463ebdaSPierre Jolivet       }
379463ebdaSPierre Jolivet     }
389463ebdaSPierre Jolivet   } else if (type == NORM_1) {
399463ebdaSPierre Jolivet     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
409463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
419463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
42857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
439463ebdaSPierre Jolivet           a_val++;
449463ebdaSPierre Jolivet         }
459463ebdaSPierre Jolivet       }
469463ebdaSPierre Jolivet     }
479463ebdaSPierre Jolivet   } else if (type == NORM_INFINITY) {
489463ebdaSPierre Jolivet     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
499463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
509463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
519463ebdaSPierre Jolivet           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
52857cbf51SRichard Tran Mills           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
539463ebdaSPierre Jolivet           a_val++;
549463ebdaSPierre Jolivet         }
559463ebdaSPierre Jolivet       }
569463ebdaSPierre Jolivet     }
57857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
58857cbf51SRichard Tran Mills     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
59857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
60857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
61857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
62857cbf51SRichard Tran Mills           a_val++;
63857cbf51SRichard Tran Mills         }
64857cbf51SRichard Tran Mills       }
65857cbf51SRichard Tran Mills     }
66857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
67857cbf51SRichard Tran Mills     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
68857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
69857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
70857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
71857cbf51SRichard Tran Mills           a_val++;
72857cbf51SRichard Tran Mills         }
73857cbf51SRichard Tran Mills       }
74857cbf51SRichard Tran Mills     }
75857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
769463ebdaSPierre Jolivet   if (type == NORM_2) {
77857cbf51SRichard Tran Mills     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
78857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79857cbf51SRichard Tran Mills     for (i = 0; i < n; i++) reductions[i] /= m;
809463ebdaSPierre Jolivet   }
819463ebdaSPierre Jolivet   PetscFunctionReturn(0);
829463ebdaSPierre Jolivet }
839463ebdaSPierre Jolivet 
849371c9d4SSatish Balay PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) {
85b01c7715SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
86de80f912SBarry Smith   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
877f0c90edSBarry Smith   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
8862bba022SBarry Smith   PetscReal    shift = 0.0;
891a9391e3SHong Zhang   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
90b01c7715SBarry Smith 
91b01c7715SBarry Smith   PetscFunctionBegin;
92a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
93a455e926SHong Zhang 
949797317bSBarry Smith   if (a->idiagvalid) {
959797317bSBarry Smith     if (values) *values = a->idiag;
969797317bSBarry Smith     PetscFunctionReturn(0);
979797317bSBarry Smith   }
989566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
99b01c7715SBarry Smith   diag_offset = a->diag;
100*4dfa11a4SJacob Faibussowitsch   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
101b01c7715SBarry Smith   diag = a->idiag;
102bbead8a2SBarry Smith   if (values) *values = a->idiag;
103b01c7715SBarry Smith   /* factor and invert each block */
104521d7252SBarry Smith   switch (bs) {
105ab040260SJed Brown   case 1:
106ab040260SJed Brown     for (i = 0; i < mbs; i++) {
107ab040260SJed Brown       odiag   = v + 1 * diag_offset[i];
108ab040260SJed Brown       diag[0] = odiag[0];
109ec1892c8SHong Zhang 
110ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
111ec1892c8SHong Zhang         if (allowzeropivot) {
1127b6c816cSBarry Smith           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1137b6c816cSBarry Smith           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1147b6c816cSBarry Smith           A->factorerror_zeropivot_row   = i;
1159566063dSJacob Faibussowitsch           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
11698921bdaSJacob Faibussowitsch         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
117ec1892c8SHong Zhang       }
118ec1892c8SHong Zhang 
119d4a378daSJed Brown       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
120ab040260SJed Brown       diag += 1;
121ab040260SJed Brown     }
122ab040260SJed Brown     break;
123b01c7715SBarry Smith   case 2:
124b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
125b01c7715SBarry Smith       odiag   = v + 4 * diag_offset[i];
1269371c9d4SSatish Balay       diag[0] = odiag[0];
1279371c9d4SSatish Balay       diag[1] = odiag[1];
1289371c9d4SSatish Balay       diag[2] = odiag[2];
1299371c9d4SSatish Balay       diag[3] = odiag[3];
1309566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1317b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
132b01c7715SBarry Smith       diag += 4;
133b01c7715SBarry Smith     }
134b01c7715SBarry Smith     break;
135b01c7715SBarry Smith   case 3:
136b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
137b01c7715SBarry Smith       odiag   = v + 9 * diag_offset[i];
1389371c9d4SSatish Balay       diag[0] = odiag[0];
1399371c9d4SSatish Balay       diag[1] = odiag[1];
1409371c9d4SSatish Balay       diag[2] = odiag[2];
1419371c9d4SSatish Balay       diag[3] = odiag[3];
1429371c9d4SSatish Balay       diag[4] = odiag[4];
1439371c9d4SSatish Balay       diag[5] = odiag[5];
1449371c9d4SSatish Balay       diag[6] = odiag[6];
1459371c9d4SSatish Balay       diag[7] = odiag[7];
146b01c7715SBarry Smith       diag[8] = odiag[8];
1479566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
1487b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
149b01c7715SBarry Smith       diag += 9;
150b01c7715SBarry Smith     }
151b01c7715SBarry Smith     break;
152b01c7715SBarry Smith   case 4:
153b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
154b01c7715SBarry Smith       odiag = v + 16 * diag_offset[i];
1559566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 16));
1569566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
1577b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
158b01c7715SBarry Smith       diag += 16;
159b01c7715SBarry Smith     }
160b01c7715SBarry Smith     break;
161b01c7715SBarry Smith   case 5:
162b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
163b01c7715SBarry Smith       odiag = v + 25 * diag_offset[i];
1649566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 25));
1659566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
1667b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
167b01c7715SBarry Smith       diag += 25;
168b01c7715SBarry Smith     }
169b01c7715SBarry Smith     break;
170d49b2adcSBarry Smith   case 6:
171d49b2adcSBarry Smith     for (i = 0; i < mbs; i++) {
172d49b2adcSBarry Smith       odiag = v + 36 * diag_offset[i];
1739566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 36));
1749566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
1757b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
176d49b2adcSBarry Smith       diag += 36;
177d49b2adcSBarry Smith     }
178d49b2adcSBarry Smith     break;
179de80f912SBarry Smith   case 7:
180de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
181de80f912SBarry Smith       odiag = v + 49 * diag_offset[i];
1829566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 49));
1839566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
1847b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
185de80f912SBarry Smith       diag += 49;
186de80f912SBarry Smith     }
187de80f912SBarry Smith     break;
188b01c7715SBarry Smith   default:
1899566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
190de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
191de80f912SBarry Smith       odiag = v + bs2 * diag_offset[i];
1929566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, bs2));
1939566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
1947b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
195de80f912SBarry Smith       diag += bs2;
196de80f912SBarry Smith     }
1979566063dSJacob Faibussowitsch     PetscCall(PetscFree2(v_work, v_pivots));
198b01c7715SBarry Smith   }
199b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
200b01c7715SBarry Smith   PetscFunctionReturn(0);
201b01c7715SBarry Smith }
202b01c7715SBarry Smith 
2039371c9d4SSatish Balay PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
2046d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
205e48d15efSToby Isaac   PetscScalar       *x, *work, *w, *workt, *t;
206e48d15efSToby Isaac   const MatScalar   *v, *aa = a->a, *idiag;
207e48d15efSToby Isaac   const PetscScalar *b, *xb;
2085455b99fSToby Isaac   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
209e48d15efSToby Isaac   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
210c1ac3661SBarry Smith   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
211b01c7715SBarry Smith 
212b01c7715SBarry Smith   PetscFunctionBegin;
213b01c7715SBarry Smith   its = its * lits;
2145f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
2155f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2165f80ce2aSJacob Faibussowitsch   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
2175f80ce2aSJacob Faibussowitsch   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
2185f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
219b01c7715SBarry Smith 
2209566063dSJacob Faibussowitsch   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
221b01c7715SBarry Smith 
222b2ec919aSToby Isaac   if (!m) PetscFunctionReturn(0);
223b01c7715SBarry Smith   diag  = a->diag;
224b01c7715SBarry Smith   idiag = a->idiag;
225de80f912SBarry Smith   k     = PetscMax(A->rmap->n, A->cmap->n);
22648a46eb9SPierre Jolivet   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
22748a46eb9SPierre Jolivet   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
22848a46eb9SPierre Jolivet   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
2293475c22fSBarry Smith   work = a->mult_work;
2303475c22fSBarry Smith   t    = a->sor_workt;
231de80f912SBarry Smith   w    = a->sor_work;
232de80f912SBarry Smith 
2339566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
2349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
235de80f912SBarry Smith 
236de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
237de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
238e48d15efSToby Isaac       switch (bs) {
239e48d15efSToby Isaac       case 1:
240e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
241e48d15efSToby Isaac         t[0] = b[0];
242e48d15efSToby Isaac         i2   = 1;
243e48d15efSToby Isaac         idiag += 1;
244e48d15efSToby Isaac         for (i = 1; i < m; i++) {
245e48d15efSToby Isaac           v    = aa + ai[i];
246e48d15efSToby Isaac           vi   = aj + ai[i];
247e48d15efSToby Isaac           nz   = diag[i] - ai[i];
248e48d15efSToby Isaac           s[0] = b[i2];
249e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
250e48d15efSToby Isaac             xw[0] = x[vi[j]];
251e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
252e48d15efSToby Isaac           }
253e48d15efSToby Isaac           t[i2] = s[0];
254e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
255e48d15efSToby Isaac           x[i2] = xw[0];
256e48d15efSToby Isaac           idiag += 1;
257e48d15efSToby Isaac           i2 += 1;
258e48d15efSToby Isaac         }
259e48d15efSToby Isaac         break;
260e48d15efSToby Isaac       case 2:
261e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
2629371c9d4SSatish Balay         t[0] = b[0];
2639371c9d4SSatish Balay         t[1] = b[1];
264e48d15efSToby Isaac         i2   = 2;
265e48d15efSToby Isaac         idiag += 4;
266e48d15efSToby Isaac         for (i = 1; i < m; i++) {
267e48d15efSToby Isaac           v    = aa + 4 * ai[i];
268e48d15efSToby Isaac           vi   = aj + ai[i];
269e48d15efSToby Isaac           nz   = diag[i] - ai[i];
2709371c9d4SSatish Balay           s[0] = b[i2];
2719371c9d4SSatish Balay           s[1] = b[i2 + 1];
272e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
273e48d15efSToby Isaac             idx   = 2 * vi[j];
274e48d15efSToby Isaac             it    = 4 * j;
2759371c9d4SSatish Balay             xw[0] = x[idx];
2769371c9d4SSatish Balay             xw[1] = x[1 + idx];
277e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
278e48d15efSToby Isaac           }
2799371c9d4SSatish Balay           t[i2]     = s[0];
2809371c9d4SSatish Balay           t[i2 + 1] = s[1];
281e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
2829371c9d4SSatish Balay           x[i2]     = xw[0];
2839371c9d4SSatish Balay           x[i2 + 1] = xw[1];
284e48d15efSToby Isaac           idiag += 4;
285e48d15efSToby Isaac           i2 += 2;
286e48d15efSToby Isaac         }
287e48d15efSToby Isaac         break;
288e48d15efSToby Isaac       case 3:
289e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
2909371c9d4SSatish Balay         t[0] = b[0];
2919371c9d4SSatish Balay         t[1] = b[1];
2929371c9d4SSatish Balay         t[2] = b[2];
293e48d15efSToby Isaac         i2   = 3;
294e48d15efSToby Isaac         idiag += 9;
295e48d15efSToby Isaac         for (i = 1; i < m; i++) {
296e48d15efSToby Isaac           v    = aa + 9 * ai[i];
297e48d15efSToby Isaac           vi   = aj + ai[i];
298e48d15efSToby Isaac           nz   = diag[i] - ai[i];
2999371c9d4SSatish Balay           s[0] = b[i2];
3009371c9d4SSatish Balay           s[1] = b[i2 + 1];
3019371c9d4SSatish Balay           s[2] = b[i2 + 2];
302e48d15efSToby Isaac           while (nz--) {
303e48d15efSToby Isaac             idx   = 3 * (*vi++);
3049371c9d4SSatish Balay             xw[0] = x[idx];
3059371c9d4SSatish Balay             xw[1] = x[1 + idx];
3069371c9d4SSatish Balay             xw[2] = x[2 + idx];
307e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
308e48d15efSToby Isaac             v += 9;
309e48d15efSToby Isaac           }
3109371c9d4SSatish Balay           t[i2]     = s[0];
3119371c9d4SSatish Balay           t[i2 + 1] = s[1];
3129371c9d4SSatish Balay           t[i2 + 2] = s[2];
313e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
3149371c9d4SSatish Balay           x[i2]     = xw[0];
3159371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3169371c9d4SSatish Balay           x[i2 + 2] = xw[2];
317e48d15efSToby Isaac           idiag += 9;
318e48d15efSToby Isaac           i2 += 3;
319e48d15efSToby Isaac         }
320e48d15efSToby Isaac         break;
321e48d15efSToby Isaac       case 4:
322e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
3239371c9d4SSatish Balay         t[0] = b[0];
3249371c9d4SSatish Balay         t[1] = b[1];
3259371c9d4SSatish Balay         t[2] = b[2];
3269371c9d4SSatish Balay         t[3] = b[3];
327e48d15efSToby Isaac         i2   = 4;
328e48d15efSToby Isaac         idiag += 16;
329e48d15efSToby Isaac         for (i = 1; i < m; i++) {
330e48d15efSToby Isaac           v    = aa + 16 * ai[i];
331e48d15efSToby Isaac           vi   = aj + ai[i];
332e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3339371c9d4SSatish Balay           s[0] = b[i2];
3349371c9d4SSatish Balay           s[1] = b[i2 + 1];
3359371c9d4SSatish Balay           s[2] = b[i2 + 2];
3369371c9d4SSatish Balay           s[3] = b[i2 + 3];
337e48d15efSToby Isaac           while (nz--) {
338e48d15efSToby Isaac             idx   = 4 * (*vi++);
3399371c9d4SSatish Balay             xw[0] = x[idx];
3409371c9d4SSatish Balay             xw[1] = x[1 + idx];
3419371c9d4SSatish Balay             xw[2] = x[2 + idx];
3429371c9d4SSatish Balay             xw[3] = x[3 + idx];
343e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
344e48d15efSToby Isaac             v += 16;
345e48d15efSToby Isaac           }
3469371c9d4SSatish Balay           t[i2]     = s[0];
3479371c9d4SSatish Balay           t[i2 + 1] = s[1];
3489371c9d4SSatish Balay           t[i2 + 2] = s[2];
3499371c9d4SSatish Balay           t[i2 + 3] = s[3];
350e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
3519371c9d4SSatish Balay           x[i2]     = xw[0];
3529371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3539371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3549371c9d4SSatish Balay           x[i2 + 3] = xw[3];
355e48d15efSToby Isaac           idiag += 16;
356e48d15efSToby Isaac           i2 += 4;
357e48d15efSToby Isaac         }
358e48d15efSToby Isaac         break;
359e48d15efSToby Isaac       case 5:
360e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
3619371c9d4SSatish Balay         t[0] = b[0];
3629371c9d4SSatish Balay         t[1] = b[1];
3639371c9d4SSatish Balay         t[2] = b[2];
3649371c9d4SSatish Balay         t[3] = b[3];
3659371c9d4SSatish Balay         t[4] = b[4];
366e48d15efSToby Isaac         i2   = 5;
367e48d15efSToby Isaac         idiag += 25;
368e48d15efSToby Isaac         for (i = 1; i < m; i++) {
369e48d15efSToby Isaac           v    = aa + 25 * ai[i];
370e48d15efSToby Isaac           vi   = aj + ai[i];
371e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3729371c9d4SSatish Balay           s[0] = b[i2];
3739371c9d4SSatish Balay           s[1] = b[i2 + 1];
3749371c9d4SSatish Balay           s[2] = b[i2 + 2];
3759371c9d4SSatish Balay           s[3] = b[i2 + 3];
3769371c9d4SSatish Balay           s[4] = b[i2 + 4];
377e48d15efSToby Isaac           while (nz--) {
378e48d15efSToby Isaac             idx   = 5 * (*vi++);
3799371c9d4SSatish Balay             xw[0] = x[idx];
3809371c9d4SSatish Balay             xw[1] = x[1 + idx];
3819371c9d4SSatish Balay             xw[2] = x[2 + idx];
3829371c9d4SSatish Balay             xw[3] = x[3 + idx];
3839371c9d4SSatish Balay             xw[4] = x[4 + idx];
384e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
385e48d15efSToby Isaac             v += 25;
386e48d15efSToby Isaac           }
3879371c9d4SSatish Balay           t[i2]     = s[0];
3889371c9d4SSatish Balay           t[i2 + 1] = s[1];
3899371c9d4SSatish Balay           t[i2 + 2] = s[2];
3909371c9d4SSatish Balay           t[i2 + 3] = s[3];
3919371c9d4SSatish Balay           t[i2 + 4] = s[4];
392e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
3939371c9d4SSatish Balay           x[i2]     = xw[0];
3949371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3959371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3969371c9d4SSatish Balay           x[i2 + 3] = xw[3];
3979371c9d4SSatish Balay           x[i2 + 4] = xw[4];
398e48d15efSToby Isaac           idiag += 25;
399e48d15efSToby Isaac           i2 += 5;
400e48d15efSToby Isaac         }
401e48d15efSToby Isaac         break;
402e48d15efSToby Isaac       case 6:
403e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
4049371c9d4SSatish Balay         t[0] = b[0];
4059371c9d4SSatish Balay         t[1] = b[1];
4069371c9d4SSatish Balay         t[2] = b[2];
4079371c9d4SSatish Balay         t[3] = b[3];
4089371c9d4SSatish Balay         t[4] = b[4];
4099371c9d4SSatish Balay         t[5] = b[5];
410e48d15efSToby Isaac         i2   = 6;
411e48d15efSToby Isaac         idiag += 36;
412e48d15efSToby Isaac         for (i = 1; i < m; i++) {
413e48d15efSToby Isaac           v    = aa + 36 * ai[i];
414e48d15efSToby Isaac           vi   = aj + ai[i];
415e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4169371c9d4SSatish Balay           s[0] = b[i2];
4179371c9d4SSatish Balay           s[1] = b[i2 + 1];
4189371c9d4SSatish Balay           s[2] = b[i2 + 2];
4199371c9d4SSatish Balay           s[3] = b[i2 + 3];
4209371c9d4SSatish Balay           s[4] = b[i2 + 4];
4219371c9d4SSatish Balay           s[5] = b[i2 + 5];
422e48d15efSToby Isaac           while (nz--) {
423e48d15efSToby Isaac             idx   = 6 * (*vi++);
4249371c9d4SSatish Balay             xw[0] = x[idx];
4259371c9d4SSatish Balay             xw[1] = x[1 + idx];
4269371c9d4SSatish Balay             xw[2] = x[2 + idx];
4279371c9d4SSatish Balay             xw[3] = x[3 + idx];
4289371c9d4SSatish Balay             xw[4] = x[4 + idx];
4299371c9d4SSatish Balay             xw[5] = x[5 + idx];
430e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
431e48d15efSToby Isaac             v += 36;
432e48d15efSToby Isaac           }
4339371c9d4SSatish Balay           t[i2]     = s[0];
4349371c9d4SSatish Balay           t[i2 + 1] = s[1];
4359371c9d4SSatish Balay           t[i2 + 2] = s[2];
4369371c9d4SSatish Balay           t[i2 + 3] = s[3];
4379371c9d4SSatish Balay           t[i2 + 4] = s[4];
4389371c9d4SSatish Balay           t[i2 + 5] = s[5];
439e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
4409371c9d4SSatish Balay           x[i2]     = xw[0];
4419371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4429371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4439371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4449371c9d4SSatish Balay           x[i2 + 4] = xw[4];
4459371c9d4SSatish Balay           x[i2 + 5] = xw[5];
446e48d15efSToby Isaac           idiag += 36;
447e48d15efSToby Isaac           i2 += 6;
448e48d15efSToby Isaac         }
449e48d15efSToby Isaac         break;
450e48d15efSToby Isaac       case 7:
451e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
4529371c9d4SSatish Balay         t[0] = b[0];
4539371c9d4SSatish Balay         t[1] = b[1];
4549371c9d4SSatish Balay         t[2] = b[2];
4559371c9d4SSatish Balay         t[3] = b[3];
4569371c9d4SSatish Balay         t[4] = b[4];
4579371c9d4SSatish Balay         t[5] = b[5];
4589371c9d4SSatish Balay         t[6] = b[6];
459e48d15efSToby Isaac         i2   = 7;
460e48d15efSToby Isaac         idiag += 49;
461e48d15efSToby Isaac         for (i = 1; i < m; i++) {
462e48d15efSToby Isaac           v    = aa + 49 * ai[i];
463e48d15efSToby Isaac           vi   = aj + ai[i];
464e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4659371c9d4SSatish Balay           s[0] = b[i2];
4669371c9d4SSatish Balay           s[1] = b[i2 + 1];
4679371c9d4SSatish Balay           s[2] = b[i2 + 2];
4689371c9d4SSatish Balay           s[3] = b[i2 + 3];
4699371c9d4SSatish Balay           s[4] = b[i2 + 4];
4709371c9d4SSatish Balay           s[5] = b[i2 + 5];
4719371c9d4SSatish Balay           s[6] = b[i2 + 6];
472e48d15efSToby Isaac           while (nz--) {
473e48d15efSToby Isaac             idx   = 7 * (*vi++);
4749371c9d4SSatish Balay             xw[0] = x[idx];
4759371c9d4SSatish Balay             xw[1] = x[1 + idx];
4769371c9d4SSatish Balay             xw[2] = x[2 + idx];
4779371c9d4SSatish Balay             xw[3] = x[3 + idx];
4789371c9d4SSatish Balay             xw[4] = x[4 + idx];
4799371c9d4SSatish Balay             xw[5] = x[5 + idx];
4809371c9d4SSatish Balay             xw[6] = x[6 + idx];
481e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
482e48d15efSToby Isaac             v += 49;
483e48d15efSToby Isaac           }
4849371c9d4SSatish Balay           t[i2]     = s[0];
4859371c9d4SSatish Balay           t[i2 + 1] = s[1];
4869371c9d4SSatish Balay           t[i2 + 2] = s[2];
4879371c9d4SSatish Balay           t[i2 + 3] = s[3];
4889371c9d4SSatish Balay           t[i2 + 4] = s[4];
4899371c9d4SSatish Balay           t[i2 + 5] = s[5];
4909371c9d4SSatish Balay           t[i2 + 6] = s[6];
491e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
4929371c9d4SSatish Balay           x[i2]     = xw[0];
4939371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4949371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4959371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4969371c9d4SSatish Balay           x[i2 + 4] = xw[4];
4979371c9d4SSatish Balay           x[i2 + 5] = xw[5];
4989371c9d4SSatish Balay           x[i2 + 6] = xw[6];
499e48d15efSToby Isaac           idiag += 49;
500e48d15efSToby Isaac           i2 += 7;
501e48d15efSToby Isaac         }
502e48d15efSToby Isaac         break;
503e48d15efSToby Isaac       default:
50496b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
5059566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(t, b, bs));
506de80f912SBarry Smith         i2 = bs;
507de80f912SBarry Smith         idiag += bs2;
508de80f912SBarry Smith         for (i = 1; i < m; i++) {
509de80f912SBarry Smith           v  = aa + bs2 * ai[i];
510de80f912SBarry Smith           vi = aj + ai[i];
511de80f912SBarry Smith           nz = diag[i] - ai[i];
512de80f912SBarry Smith 
5139566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
514de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
515de80f912SBarry Smith           workt = work;
516de80f912SBarry Smith           for (j = 0; j < nz; j++) {
5179566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
518de80f912SBarry Smith             workt += bs;
519de80f912SBarry Smith           }
52096b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
5219566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(t + i2, w, bs));
52296b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
523de80f912SBarry Smith 
524de80f912SBarry Smith           idiag += bs2;
525de80f912SBarry Smith           i2 += bs;
526de80f912SBarry Smith         }
527e48d15efSToby Isaac         break;
528e48d15efSToby Isaac       }
529de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
5309566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
531e48d15efSToby Isaac       xb = t;
5329371c9d4SSatish Balay     } else xb = b;
533de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
534e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
535e48d15efSToby Isaac       i2    = bs * (m - 1);
536e48d15efSToby Isaac       switch (bs) {
537e48d15efSToby Isaac       case 1:
538e48d15efSToby Isaac         s[0] = xb[i2];
539e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
540e48d15efSToby Isaac         x[i2] = xw[0];
541e48d15efSToby Isaac         i2 -= 1;
542e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
543e48d15efSToby Isaac           v    = aa + (diag[i] + 1);
544e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
545e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
546e48d15efSToby Isaac           s[0] = xb[i2];
547e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
548e48d15efSToby Isaac             xw[0] = x[vi[j]];
549e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
550e48d15efSToby Isaac           }
551e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
552e48d15efSToby Isaac           x[i2] = xw[0];
553e48d15efSToby Isaac           idiag -= 1;
554e48d15efSToby Isaac           i2 -= 1;
555e48d15efSToby Isaac         }
556e48d15efSToby Isaac         break;
557e48d15efSToby Isaac       case 2:
5589371c9d4SSatish Balay         s[0] = xb[i2];
5599371c9d4SSatish Balay         s[1] = xb[i2 + 1];
560e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5619371c9d4SSatish Balay         x[i2]     = xw[0];
5629371c9d4SSatish Balay         x[i2 + 1] = xw[1];
563e48d15efSToby Isaac         i2 -= 2;
564e48d15efSToby Isaac         idiag -= 4;
565e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
566e48d15efSToby Isaac           v    = aa + 4 * (diag[i] + 1);
567e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
568e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
5699371c9d4SSatish Balay           s[0] = xb[i2];
5709371c9d4SSatish Balay           s[1] = xb[i2 + 1];
571e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
572e48d15efSToby Isaac             idx   = 2 * vi[j];
573e48d15efSToby Isaac             it    = 4 * j;
5749371c9d4SSatish Balay             xw[0] = x[idx];
5759371c9d4SSatish Balay             xw[1] = x[1 + idx];
576e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
577e48d15efSToby Isaac           }
578e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5799371c9d4SSatish Balay           x[i2]     = xw[0];
5809371c9d4SSatish Balay           x[i2 + 1] = xw[1];
581e48d15efSToby Isaac           idiag -= 4;
582e48d15efSToby Isaac           i2 -= 2;
583e48d15efSToby Isaac         }
584e48d15efSToby Isaac         break;
585e48d15efSToby Isaac       case 3:
5869371c9d4SSatish Balay         s[0] = xb[i2];
5879371c9d4SSatish Balay         s[1] = xb[i2 + 1];
5889371c9d4SSatish Balay         s[2] = xb[i2 + 2];
589e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
5909371c9d4SSatish Balay         x[i2]     = xw[0];
5919371c9d4SSatish Balay         x[i2 + 1] = xw[1];
5929371c9d4SSatish Balay         x[i2 + 2] = xw[2];
593e48d15efSToby Isaac         i2 -= 3;
594e48d15efSToby Isaac         idiag -= 9;
595e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
596e48d15efSToby Isaac           v    = aa + 9 * (diag[i] + 1);
597e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
598e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
5999371c9d4SSatish Balay           s[0] = xb[i2];
6009371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6019371c9d4SSatish Balay           s[2] = xb[i2 + 2];
602e48d15efSToby Isaac           while (nz--) {
603e48d15efSToby Isaac             idx   = 3 * (*vi++);
6049371c9d4SSatish Balay             xw[0] = x[idx];
6059371c9d4SSatish Balay             xw[1] = x[1 + idx];
6069371c9d4SSatish Balay             xw[2] = x[2 + idx];
607e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
608e48d15efSToby Isaac             v += 9;
609e48d15efSToby Isaac           }
610e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6119371c9d4SSatish Balay           x[i2]     = xw[0];
6129371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6139371c9d4SSatish Balay           x[i2 + 2] = xw[2];
614e48d15efSToby Isaac           idiag -= 9;
615e48d15efSToby Isaac           i2 -= 3;
616e48d15efSToby Isaac         }
617e48d15efSToby Isaac         break;
618e48d15efSToby Isaac       case 4:
6199371c9d4SSatish Balay         s[0] = xb[i2];
6209371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6219371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6229371c9d4SSatish Balay         s[3] = xb[i2 + 3];
623e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6249371c9d4SSatish Balay         x[i2]     = xw[0];
6259371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6269371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6279371c9d4SSatish Balay         x[i2 + 3] = xw[3];
628e48d15efSToby Isaac         i2 -= 4;
629e48d15efSToby Isaac         idiag -= 16;
630e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
631e48d15efSToby Isaac           v    = aa + 16 * (diag[i] + 1);
632e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
633e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6349371c9d4SSatish Balay           s[0] = xb[i2];
6359371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6369371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6379371c9d4SSatish Balay           s[3] = xb[i2 + 3];
638e48d15efSToby Isaac           while (nz--) {
639e48d15efSToby Isaac             idx   = 4 * (*vi++);
6409371c9d4SSatish Balay             xw[0] = x[idx];
6419371c9d4SSatish Balay             xw[1] = x[1 + idx];
6429371c9d4SSatish Balay             xw[2] = x[2 + idx];
6439371c9d4SSatish Balay             xw[3] = x[3 + idx];
644e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
645e48d15efSToby Isaac             v += 16;
646e48d15efSToby Isaac           }
647e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6489371c9d4SSatish Balay           x[i2]     = xw[0];
6499371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6509371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6519371c9d4SSatish Balay           x[i2 + 3] = xw[3];
652e48d15efSToby Isaac           idiag -= 16;
653e48d15efSToby Isaac           i2 -= 4;
654e48d15efSToby Isaac         }
655e48d15efSToby Isaac         break;
656e48d15efSToby Isaac       case 5:
6579371c9d4SSatish Balay         s[0] = xb[i2];
6589371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6599371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6609371c9d4SSatish Balay         s[3] = xb[i2 + 3];
6619371c9d4SSatish Balay         s[4] = xb[i2 + 4];
662e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6639371c9d4SSatish Balay         x[i2]     = xw[0];
6649371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6659371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6669371c9d4SSatish Balay         x[i2 + 3] = xw[3];
6679371c9d4SSatish Balay         x[i2 + 4] = xw[4];
668e48d15efSToby Isaac         i2 -= 5;
669e48d15efSToby Isaac         idiag -= 25;
670e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
671e48d15efSToby Isaac           v    = aa + 25 * (diag[i] + 1);
672e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
673e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6749371c9d4SSatish Balay           s[0] = xb[i2];
6759371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6769371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6779371c9d4SSatish Balay           s[3] = xb[i2 + 3];
6789371c9d4SSatish Balay           s[4] = xb[i2 + 4];
679e48d15efSToby Isaac           while (nz--) {
680e48d15efSToby Isaac             idx   = 5 * (*vi++);
6819371c9d4SSatish Balay             xw[0] = x[idx];
6829371c9d4SSatish Balay             xw[1] = x[1 + idx];
6839371c9d4SSatish Balay             xw[2] = x[2 + idx];
6849371c9d4SSatish Balay             xw[3] = x[3 + idx];
6859371c9d4SSatish Balay             xw[4] = x[4 + idx];
686e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
687e48d15efSToby Isaac             v += 25;
688e48d15efSToby Isaac           }
689e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6909371c9d4SSatish Balay           x[i2]     = xw[0];
6919371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6929371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6939371c9d4SSatish Balay           x[i2 + 3] = xw[3];
6949371c9d4SSatish Balay           x[i2 + 4] = xw[4];
695e48d15efSToby Isaac           idiag -= 25;
696e48d15efSToby Isaac           i2 -= 5;
697e48d15efSToby Isaac         }
698e48d15efSToby Isaac         break;
699e48d15efSToby Isaac       case 6:
7009371c9d4SSatish Balay         s[0] = xb[i2];
7019371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7029371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7039371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7049371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7059371c9d4SSatish Balay         s[5] = xb[i2 + 5];
706e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7079371c9d4SSatish Balay         x[i2]     = xw[0];
7089371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7099371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7109371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7119371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7129371c9d4SSatish Balay         x[i2 + 5] = xw[5];
713e48d15efSToby Isaac         i2 -= 6;
714e48d15efSToby Isaac         idiag -= 36;
715e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
716e48d15efSToby Isaac           v    = aa + 36 * (diag[i] + 1);
717e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
718e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7199371c9d4SSatish Balay           s[0] = xb[i2];
7209371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7219371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7229371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7239371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7249371c9d4SSatish Balay           s[5] = xb[i2 + 5];
725e48d15efSToby Isaac           while (nz--) {
726e48d15efSToby Isaac             idx   = 6 * (*vi++);
7279371c9d4SSatish Balay             xw[0] = x[idx];
7289371c9d4SSatish Balay             xw[1] = x[1 + idx];
7299371c9d4SSatish Balay             xw[2] = x[2 + idx];
7309371c9d4SSatish Balay             xw[3] = x[3 + idx];
7319371c9d4SSatish Balay             xw[4] = x[4 + idx];
7329371c9d4SSatish Balay             xw[5] = x[5 + idx];
733e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
734e48d15efSToby Isaac             v += 36;
735e48d15efSToby Isaac           }
736e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7379371c9d4SSatish Balay           x[i2]     = xw[0];
7389371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7399371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7409371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7419371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7429371c9d4SSatish Balay           x[i2 + 5] = xw[5];
743e48d15efSToby Isaac           idiag -= 36;
744e48d15efSToby Isaac           i2 -= 6;
745e48d15efSToby Isaac         }
746e48d15efSToby Isaac         break;
747e48d15efSToby Isaac       case 7:
7489371c9d4SSatish Balay         s[0] = xb[i2];
7499371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7509371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7519371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7529371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7539371c9d4SSatish Balay         s[5] = xb[i2 + 5];
7549371c9d4SSatish Balay         s[6] = xb[i2 + 6];
755e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
7569371c9d4SSatish Balay         x[i2]     = xw[0];
7579371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7589371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7599371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7609371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7619371c9d4SSatish Balay         x[i2 + 5] = xw[5];
7629371c9d4SSatish Balay         x[i2 + 6] = xw[6];
763e48d15efSToby Isaac         i2 -= 7;
764e48d15efSToby Isaac         idiag -= 49;
765e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
766e48d15efSToby Isaac           v    = aa + 49 * (diag[i] + 1);
767e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
768e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7699371c9d4SSatish Balay           s[0] = xb[i2];
7709371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7719371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7729371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7739371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7749371c9d4SSatish Balay           s[5] = xb[i2 + 5];
7759371c9d4SSatish Balay           s[6] = xb[i2 + 6];
776e48d15efSToby Isaac           while (nz--) {
777e48d15efSToby Isaac             idx   = 7 * (*vi++);
7789371c9d4SSatish Balay             xw[0] = x[idx];
7799371c9d4SSatish Balay             xw[1] = x[1 + idx];
7809371c9d4SSatish Balay             xw[2] = x[2 + idx];
7819371c9d4SSatish Balay             xw[3] = x[3 + idx];
7829371c9d4SSatish Balay             xw[4] = x[4 + idx];
7839371c9d4SSatish Balay             xw[5] = x[5 + idx];
7849371c9d4SSatish Balay             xw[6] = x[6 + idx];
785e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
786e48d15efSToby Isaac             v += 49;
787e48d15efSToby Isaac           }
788e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
7899371c9d4SSatish Balay           x[i2]     = xw[0];
7909371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7919371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7929371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7939371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7949371c9d4SSatish Balay           x[i2 + 5] = xw[5];
7959371c9d4SSatish Balay           x[i2 + 6] = xw[6];
796e48d15efSToby Isaac           idiag -= 49;
797e48d15efSToby Isaac           i2 -= 7;
798e48d15efSToby Isaac         }
799e48d15efSToby Isaac         break;
800e48d15efSToby Isaac       default:
8019566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(w, xb + i2, bs));
80296b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
803de80f912SBarry Smith         i2 -= bs;
804e48d15efSToby Isaac         idiag -= bs2;
805de80f912SBarry Smith         for (i = m - 2; i >= 0; i--) {
806de80f912SBarry Smith           v  = aa + bs2 * (diag[i] + 1);
807de80f912SBarry Smith           vi = aj + diag[i] + 1;
808de80f912SBarry Smith           nz = ai[i + 1] - diag[i] - 1;
809de80f912SBarry Smith 
8109566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, xb + i2, bs));
811de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
812de80f912SBarry Smith           workt = work;
813de80f912SBarry Smith           for (j = 0; j < nz; j++) {
8149566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
815de80f912SBarry Smith             workt += bs;
816de80f912SBarry Smith           }
81796b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
81896b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
819e48d15efSToby Isaac 
820de80f912SBarry Smith           idiag -= bs2;
821de80f912SBarry Smith           i2 -= bs;
822de80f912SBarry Smith         }
823e48d15efSToby Isaac         break;
824e48d15efSToby Isaac       }
8259566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
826de80f912SBarry Smith     }
827e48d15efSToby Isaac     its--;
828e48d15efSToby Isaac   }
829e48d15efSToby Isaac   while (its--) {
830e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
831e48d15efSToby Isaac       idiag = a->idiag;
832e48d15efSToby Isaac       i2    = 0;
833e48d15efSToby Isaac       switch (bs) {
834e48d15efSToby Isaac       case 1:
835e48d15efSToby Isaac         for (i = 0; i < m; i++) {
836e48d15efSToby Isaac           v    = aa + ai[i];
837e48d15efSToby Isaac           vi   = aj + ai[i];
838e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
839e48d15efSToby Isaac           s[0] = b[i2];
840e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
841e48d15efSToby Isaac             xw[0] = x[vi[j]];
842e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
843e48d15efSToby Isaac           }
844e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
845e48d15efSToby Isaac           x[i2] += xw[0];
846e48d15efSToby Isaac           idiag += 1;
847e48d15efSToby Isaac           i2 += 1;
848e48d15efSToby Isaac         }
849e48d15efSToby Isaac         break;
850e48d15efSToby Isaac       case 2:
851e48d15efSToby Isaac         for (i = 0; i < m; i++) {
852e48d15efSToby Isaac           v    = aa + 4 * ai[i];
853e48d15efSToby Isaac           vi   = aj + ai[i];
854e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8559371c9d4SSatish Balay           s[0] = b[i2];
8569371c9d4SSatish Balay           s[1] = b[i2 + 1];
857e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
858e48d15efSToby Isaac             idx   = 2 * vi[j];
859e48d15efSToby Isaac             it    = 4 * j;
8609371c9d4SSatish Balay             xw[0] = x[idx];
8619371c9d4SSatish Balay             xw[1] = x[1 + idx];
862e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
863e48d15efSToby Isaac           }
864e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
8659371c9d4SSatish Balay           x[i2] += xw[0];
8669371c9d4SSatish Balay           x[i2 + 1] += xw[1];
867e48d15efSToby Isaac           idiag += 4;
868e48d15efSToby Isaac           i2 += 2;
869e48d15efSToby Isaac         }
870e48d15efSToby Isaac         break;
871e48d15efSToby Isaac       case 3:
872e48d15efSToby Isaac         for (i = 0; i < m; i++) {
873e48d15efSToby Isaac           v    = aa + 9 * ai[i];
874e48d15efSToby Isaac           vi   = aj + ai[i];
875e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8769371c9d4SSatish Balay           s[0] = b[i2];
8779371c9d4SSatish Balay           s[1] = b[i2 + 1];
8789371c9d4SSatish Balay           s[2] = b[i2 + 2];
879e48d15efSToby Isaac           while (nz--) {
880e48d15efSToby Isaac             idx   = 3 * (*vi++);
8819371c9d4SSatish Balay             xw[0] = x[idx];
8829371c9d4SSatish Balay             xw[1] = x[1 + idx];
8839371c9d4SSatish Balay             xw[2] = x[2 + idx];
884e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
885e48d15efSToby Isaac             v += 9;
886e48d15efSToby Isaac           }
887e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
8889371c9d4SSatish Balay           x[i2] += xw[0];
8899371c9d4SSatish Balay           x[i2 + 1] += xw[1];
8909371c9d4SSatish Balay           x[i2 + 2] += xw[2];
891e48d15efSToby Isaac           idiag += 9;
892e48d15efSToby Isaac           i2 += 3;
893e48d15efSToby Isaac         }
894e48d15efSToby Isaac         break;
895e48d15efSToby Isaac       case 4:
896e48d15efSToby Isaac         for (i = 0; i < m; i++) {
897e48d15efSToby Isaac           v    = aa + 16 * ai[i];
898e48d15efSToby Isaac           vi   = aj + ai[i];
899e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9009371c9d4SSatish Balay           s[0] = b[i2];
9019371c9d4SSatish Balay           s[1] = b[i2 + 1];
9029371c9d4SSatish Balay           s[2] = b[i2 + 2];
9039371c9d4SSatish Balay           s[3] = b[i2 + 3];
904e48d15efSToby Isaac           while (nz--) {
905e48d15efSToby Isaac             idx   = 4 * (*vi++);
9069371c9d4SSatish Balay             xw[0] = x[idx];
9079371c9d4SSatish Balay             xw[1] = x[1 + idx];
9089371c9d4SSatish Balay             xw[2] = x[2 + idx];
9099371c9d4SSatish Balay             xw[3] = x[3 + idx];
910e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
911e48d15efSToby Isaac             v += 16;
912e48d15efSToby Isaac           }
913e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
9149371c9d4SSatish Balay           x[i2] += xw[0];
9159371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9169371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9179371c9d4SSatish Balay           x[i2 + 3] += xw[3];
918e48d15efSToby Isaac           idiag += 16;
919e48d15efSToby Isaac           i2 += 4;
920e48d15efSToby Isaac         }
921e48d15efSToby Isaac         break;
922e48d15efSToby Isaac       case 5:
923e48d15efSToby Isaac         for (i = 0; i < m; i++) {
924e48d15efSToby Isaac           v    = aa + 25 * ai[i];
925e48d15efSToby Isaac           vi   = aj + ai[i];
926e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9279371c9d4SSatish Balay           s[0] = b[i2];
9289371c9d4SSatish Balay           s[1] = b[i2 + 1];
9299371c9d4SSatish Balay           s[2] = b[i2 + 2];
9309371c9d4SSatish Balay           s[3] = b[i2 + 3];
9319371c9d4SSatish Balay           s[4] = b[i2 + 4];
932e48d15efSToby Isaac           while (nz--) {
933e48d15efSToby Isaac             idx   = 5 * (*vi++);
9349371c9d4SSatish Balay             xw[0] = x[idx];
9359371c9d4SSatish Balay             xw[1] = x[1 + idx];
9369371c9d4SSatish Balay             xw[2] = x[2 + idx];
9379371c9d4SSatish Balay             xw[3] = x[3 + idx];
9389371c9d4SSatish Balay             xw[4] = x[4 + idx];
939e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
940e48d15efSToby Isaac             v += 25;
941e48d15efSToby Isaac           }
942e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
9439371c9d4SSatish Balay           x[i2] += xw[0];
9449371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9459371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9469371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9479371c9d4SSatish Balay           x[i2 + 4] += xw[4];
948e48d15efSToby Isaac           idiag += 25;
949e48d15efSToby Isaac           i2 += 5;
950e48d15efSToby Isaac         }
951e48d15efSToby Isaac         break;
952e48d15efSToby Isaac       case 6:
953e48d15efSToby Isaac         for (i = 0; i < m; i++) {
954e48d15efSToby Isaac           v    = aa + 36 * ai[i];
955e48d15efSToby Isaac           vi   = aj + ai[i];
956e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9579371c9d4SSatish Balay           s[0] = b[i2];
9589371c9d4SSatish Balay           s[1] = b[i2 + 1];
9599371c9d4SSatish Balay           s[2] = b[i2 + 2];
9609371c9d4SSatish Balay           s[3] = b[i2 + 3];
9619371c9d4SSatish Balay           s[4] = b[i2 + 4];
9629371c9d4SSatish Balay           s[5] = b[i2 + 5];
963e48d15efSToby Isaac           while (nz--) {
964e48d15efSToby Isaac             idx   = 6 * (*vi++);
9659371c9d4SSatish Balay             xw[0] = x[idx];
9669371c9d4SSatish Balay             xw[1] = x[1 + idx];
9679371c9d4SSatish Balay             xw[2] = x[2 + idx];
9689371c9d4SSatish Balay             xw[3] = x[3 + idx];
9699371c9d4SSatish Balay             xw[4] = x[4 + idx];
9709371c9d4SSatish Balay             xw[5] = x[5 + idx];
971e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
972e48d15efSToby Isaac             v += 36;
973e48d15efSToby Isaac           }
974e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
9759371c9d4SSatish Balay           x[i2] += xw[0];
9769371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9779371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9789371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9799371c9d4SSatish Balay           x[i2 + 4] += xw[4];
9809371c9d4SSatish Balay           x[i2 + 5] += xw[5];
981e48d15efSToby Isaac           idiag += 36;
982e48d15efSToby Isaac           i2 += 6;
983e48d15efSToby Isaac         }
984e48d15efSToby Isaac         break;
985e48d15efSToby Isaac       case 7:
986e48d15efSToby Isaac         for (i = 0; i < m; i++) {
987e48d15efSToby Isaac           v    = aa + 49 * ai[i];
988e48d15efSToby Isaac           vi   = aj + ai[i];
989e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9909371c9d4SSatish Balay           s[0] = b[i2];
9919371c9d4SSatish Balay           s[1] = b[i2 + 1];
9929371c9d4SSatish Balay           s[2] = b[i2 + 2];
9939371c9d4SSatish Balay           s[3] = b[i2 + 3];
9949371c9d4SSatish Balay           s[4] = b[i2 + 4];
9959371c9d4SSatish Balay           s[5] = b[i2 + 5];
9969371c9d4SSatish Balay           s[6] = b[i2 + 6];
997e48d15efSToby Isaac           while (nz--) {
998e48d15efSToby Isaac             idx   = 7 * (*vi++);
9999371c9d4SSatish Balay             xw[0] = x[idx];
10009371c9d4SSatish Balay             xw[1] = x[1 + idx];
10019371c9d4SSatish Balay             xw[2] = x[2 + idx];
10029371c9d4SSatish Balay             xw[3] = x[3 + idx];
10039371c9d4SSatish Balay             xw[4] = x[4 + idx];
10049371c9d4SSatish Balay             xw[5] = x[5 + idx];
10059371c9d4SSatish Balay             xw[6] = x[6 + idx];
1006e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1007e48d15efSToby Isaac             v += 49;
1008e48d15efSToby Isaac           }
1009e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
10109371c9d4SSatish Balay           x[i2] += xw[0];
10119371c9d4SSatish Balay           x[i2 + 1] += xw[1];
10129371c9d4SSatish Balay           x[i2 + 2] += xw[2];
10139371c9d4SSatish Balay           x[i2 + 3] += xw[3];
10149371c9d4SSatish Balay           x[i2 + 4] += xw[4];
10159371c9d4SSatish Balay           x[i2 + 5] += xw[5];
10169371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1017e48d15efSToby Isaac           idiag += 49;
1018e48d15efSToby Isaac           i2 += 7;
1019e48d15efSToby Isaac         }
1020e48d15efSToby Isaac         break;
1021e48d15efSToby Isaac       default:
1022e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1023e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1024e48d15efSToby Isaac           vi = aj + ai[i];
1025e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1026e48d15efSToby Isaac 
10279566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1028e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1029e48d15efSToby Isaac           workt = work;
1030e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
10319566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1032e48d15efSToby Isaac             workt += bs;
1033e48d15efSToby Isaac           }
1034e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1035e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1036e48d15efSToby Isaac 
1037e48d15efSToby Isaac           idiag += bs2;
1038e48d15efSToby Isaac           i2 += bs;
1039e48d15efSToby Isaac         }
1040e48d15efSToby Isaac         break;
1041e48d15efSToby Isaac       }
10429566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1043e48d15efSToby Isaac     }
1044e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1045e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
1046e48d15efSToby Isaac       i2    = bs * (m - 1);
1047e48d15efSToby Isaac       switch (bs) {
1048e48d15efSToby Isaac       case 1:
1049e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1050e48d15efSToby Isaac           v    = aa + ai[i];
1051e48d15efSToby Isaac           vi   = aj + ai[i];
1052e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
1053e48d15efSToby Isaac           s[0] = b[i2];
1054e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1055e48d15efSToby Isaac             xw[0] = x[vi[j]];
1056e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1057e48d15efSToby Isaac           }
1058e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1059e48d15efSToby Isaac           x[i2] += xw[0];
1060e48d15efSToby Isaac           idiag -= 1;
1061e48d15efSToby Isaac           i2 -= 1;
1062e48d15efSToby Isaac         }
1063e48d15efSToby Isaac         break;
1064e48d15efSToby Isaac       case 2:
1065e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1066e48d15efSToby Isaac           v    = aa + 4 * ai[i];
1067e48d15efSToby Isaac           vi   = aj + ai[i];
1068e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10699371c9d4SSatish Balay           s[0] = b[i2];
10709371c9d4SSatish Balay           s[1] = b[i2 + 1];
1071e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1072e48d15efSToby Isaac             idx   = 2 * vi[j];
1073e48d15efSToby Isaac             it    = 4 * j;
10749371c9d4SSatish Balay             xw[0] = x[idx];
10759371c9d4SSatish Balay             xw[1] = x[1 + idx];
1076e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1077e48d15efSToby Isaac           }
1078e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
10799371c9d4SSatish Balay           x[i2] += xw[0];
10809371c9d4SSatish Balay           x[i2 + 1] += xw[1];
1081e48d15efSToby Isaac           idiag -= 4;
1082e48d15efSToby Isaac           i2 -= 2;
1083e48d15efSToby Isaac         }
1084e48d15efSToby Isaac         break;
1085e48d15efSToby Isaac       case 3:
1086e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1087e48d15efSToby Isaac           v    = aa + 9 * ai[i];
1088e48d15efSToby Isaac           vi   = aj + ai[i];
1089e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10909371c9d4SSatish Balay           s[0] = b[i2];
10919371c9d4SSatish Balay           s[1] = b[i2 + 1];
10929371c9d4SSatish Balay           s[2] = b[i2 + 2];
1093e48d15efSToby Isaac           while (nz--) {
1094e48d15efSToby Isaac             idx   = 3 * (*vi++);
10959371c9d4SSatish Balay             xw[0] = x[idx];
10969371c9d4SSatish Balay             xw[1] = x[1 + idx];
10979371c9d4SSatish Balay             xw[2] = x[2 + idx];
1098e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1099e48d15efSToby Isaac             v += 9;
1100e48d15efSToby Isaac           }
1101e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
11029371c9d4SSatish Balay           x[i2] += xw[0];
11039371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11049371c9d4SSatish Balay           x[i2 + 2] += xw[2];
1105e48d15efSToby Isaac           idiag -= 9;
1106e48d15efSToby Isaac           i2 -= 3;
1107e48d15efSToby Isaac         }
1108e48d15efSToby Isaac         break;
1109e48d15efSToby Isaac       case 4:
1110e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1111e48d15efSToby Isaac           v    = aa + 16 * ai[i];
1112e48d15efSToby Isaac           vi   = aj + ai[i];
1113e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11149371c9d4SSatish Balay           s[0] = b[i2];
11159371c9d4SSatish Balay           s[1] = b[i2 + 1];
11169371c9d4SSatish Balay           s[2] = b[i2 + 2];
11179371c9d4SSatish Balay           s[3] = b[i2 + 3];
1118e48d15efSToby Isaac           while (nz--) {
1119e48d15efSToby Isaac             idx   = 4 * (*vi++);
11209371c9d4SSatish Balay             xw[0] = x[idx];
11219371c9d4SSatish Balay             xw[1] = x[1 + idx];
11229371c9d4SSatish Balay             xw[2] = x[2 + idx];
11239371c9d4SSatish Balay             xw[3] = x[3 + idx];
1124e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1125e48d15efSToby Isaac             v += 16;
1126e48d15efSToby Isaac           }
1127e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
11289371c9d4SSatish Balay           x[i2] += xw[0];
11299371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11309371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11319371c9d4SSatish Balay           x[i2 + 3] += xw[3];
1132e48d15efSToby Isaac           idiag -= 16;
1133e48d15efSToby Isaac           i2 -= 4;
1134e48d15efSToby Isaac         }
1135e48d15efSToby Isaac         break;
1136e48d15efSToby Isaac       case 5:
1137e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1138e48d15efSToby Isaac           v    = aa + 25 * ai[i];
1139e48d15efSToby Isaac           vi   = aj + ai[i];
1140e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11419371c9d4SSatish Balay           s[0] = b[i2];
11429371c9d4SSatish Balay           s[1] = b[i2 + 1];
11439371c9d4SSatish Balay           s[2] = b[i2 + 2];
11449371c9d4SSatish Balay           s[3] = b[i2 + 3];
11459371c9d4SSatish Balay           s[4] = b[i2 + 4];
1146e48d15efSToby Isaac           while (nz--) {
1147e48d15efSToby Isaac             idx   = 5 * (*vi++);
11489371c9d4SSatish Balay             xw[0] = x[idx];
11499371c9d4SSatish Balay             xw[1] = x[1 + idx];
11509371c9d4SSatish Balay             xw[2] = x[2 + idx];
11519371c9d4SSatish Balay             xw[3] = x[3 + idx];
11529371c9d4SSatish Balay             xw[4] = x[4 + idx];
1153e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1154e48d15efSToby Isaac             v += 25;
1155e48d15efSToby Isaac           }
1156e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
11579371c9d4SSatish Balay           x[i2] += xw[0];
11589371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11599371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11609371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11619371c9d4SSatish Balay           x[i2 + 4] += xw[4];
1162e48d15efSToby Isaac           idiag -= 25;
1163e48d15efSToby Isaac           i2 -= 5;
1164e48d15efSToby Isaac         }
1165e48d15efSToby Isaac         break;
1166e48d15efSToby Isaac       case 6:
1167e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1168e48d15efSToby Isaac           v    = aa + 36 * ai[i];
1169e48d15efSToby Isaac           vi   = aj + ai[i];
1170e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11719371c9d4SSatish Balay           s[0] = b[i2];
11729371c9d4SSatish Balay           s[1] = b[i2 + 1];
11739371c9d4SSatish Balay           s[2] = b[i2 + 2];
11749371c9d4SSatish Balay           s[3] = b[i2 + 3];
11759371c9d4SSatish Balay           s[4] = b[i2 + 4];
11769371c9d4SSatish Balay           s[5] = b[i2 + 5];
1177e48d15efSToby Isaac           while (nz--) {
1178e48d15efSToby Isaac             idx   = 6 * (*vi++);
11799371c9d4SSatish Balay             xw[0] = x[idx];
11809371c9d4SSatish Balay             xw[1] = x[1 + idx];
11819371c9d4SSatish Balay             xw[2] = x[2 + idx];
11829371c9d4SSatish Balay             xw[3] = x[3 + idx];
11839371c9d4SSatish Balay             xw[4] = x[4 + idx];
11849371c9d4SSatish Balay             xw[5] = x[5 + idx];
1185e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1186e48d15efSToby Isaac             v += 36;
1187e48d15efSToby Isaac           }
1188e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
11899371c9d4SSatish Balay           x[i2] += xw[0];
11909371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11919371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11929371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11939371c9d4SSatish Balay           x[i2 + 4] += xw[4];
11949371c9d4SSatish Balay           x[i2 + 5] += xw[5];
1195e48d15efSToby Isaac           idiag -= 36;
1196e48d15efSToby Isaac           i2 -= 6;
1197e48d15efSToby Isaac         }
1198e48d15efSToby Isaac         break;
1199e48d15efSToby Isaac       case 7:
1200e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1201e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1202e48d15efSToby Isaac           vi   = aj + ai[i];
1203e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
12049371c9d4SSatish Balay           s[0] = b[i2];
12059371c9d4SSatish Balay           s[1] = b[i2 + 1];
12069371c9d4SSatish Balay           s[2] = b[i2 + 2];
12079371c9d4SSatish Balay           s[3] = b[i2 + 3];
12089371c9d4SSatish Balay           s[4] = b[i2 + 4];
12099371c9d4SSatish Balay           s[5] = b[i2 + 5];
12109371c9d4SSatish Balay           s[6] = b[i2 + 6];
1211e48d15efSToby Isaac           while (nz--) {
1212e48d15efSToby Isaac             idx   = 7 * (*vi++);
12139371c9d4SSatish Balay             xw[0] = x[idx];
12149371c9d4SSatish Balay             xw[1] = x[1 + idx];
12159371c9d4SSatish Balay             xw[2] = x[2 + idx];
12169371c9d4SSatish Balay             xw[3] = x[3 + idx];
12179371c9d4SSatish Balay             xw[4] = x[4 + idx];
12189371c9d4SSatish Balay             xw[5] = x[5 + idx];
12199371c9d4SSatish Balay             xw[6] = x[6 + idx];
1220e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1221e48d15efSToby Isaac             v += 49;
1222e48d15efSToby Isaac           }
1223e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
12249371c9d4SSatish Balay           x[i2] += xw[0];
12259371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12269371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12279371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12289371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12299371c9d4SSatish Balay           x[i2 + 5] += xw[5];
12309371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1231e48d15efSToby Isaac           idiag -= 49;
1232e48d15efSToby Isaac           i2 -= 7;
1233e48d15efSToby Isaac         }
1234e48d15efSToby Isaac         break;
1235e48d15efSToby Isaac       default:
1236e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1237e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1238e48d15efSToby Isaac           vi = aj + ai[i];
1239e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1240e48d15efSToby Isaac 
12419566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1242e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1243e48d15efSToby Isaac           workt = work;
1244e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
12459566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1246e48d15efSToby Isaac             workt += bs;
1247e48d15efSToby Isaac           }
1248e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1249e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1250e48d15efSToby Isaac 
1251e48d15efSToby Isaac           idiag -= bs2;
1252e48d15efSToby Isaac           i2 -= bs;
1253e48d15efSToby Isaac         }
1254e48d15efSToby Isaac         break;
1255e48d15efSToby Isaac       }
12569566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1257e48d15efSToby Isaac     }
1258e48d15efSToby Isaac   }
12599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
1261de80f912SBarry Smith   PetscFunctionReturn(0);
1262de80f912SBarry Smith }
1263de80f912SBarry Smith 
1264af674e45SBarry Smith /*
126581824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1266af674e45SBarry Smith */
1267af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1268af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1269af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1270af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4
1271af674e45SBarry Smith #endif
1272af674e45SBarry Smith 
12739371c9d4SSatish Balay PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) {
1274af674e45SBarry Smith   Mat                A = *AA;
1275af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1276c1ac3661SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1277c1ac3661SBarry Smith   PetscInt          *ai = a->i, *ailen = a->ilen;
127817ec6a02SBarry Smith   PetscInt          *aj = a->j, stepval, lastcol = -1;
1279f15d580aSBarry Smith   const PetscScalar *value = v;
12804bb09213Spetsc   MatScalar         *ap, *aa = a->a, *bap;
1281af674e45SBarry Smith 
1282af674e45SBarry Smith   PetscFunctionBegin;
1283ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1284af674e45SBarry Smith   stepval = (n - 1) * 4;
1285af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
1286af674e45SBarry Smith     row  = im[k];
1287af674e45SBarry Smith     rp   = aj + ai[row];
1288af674e45SBarry Smith     ap   = aa + 16 * ai[row];
1289af674e45SBarry Smith     nrow = ailen[row];
1290af674e45SBarry Smith     low  = 0;
129117ec6a02SBarry Smith     high = nrow;
1292af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
1293af674e45SBarry Smith       col = in[l];
1294db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1295db4deed7SKarl Rupp       else high = nrow;
129617ec6a02SBarry Smith       lastcol = col;
12971e3347e8SBarry Smith       value   = v + k * (stepval + 4 + l) * 4;
1298af674e45SBarry Smith       while (high - low > 7) {
1299af674e45SBarry Smith         t = (low + high) / 2;
1300af674e45SBarry Smith         if (rp[t] > col) high = t;
1301af674e45SBarry Smith         else low = t;
1302af674e45SBarry Smith       }
1303af674e45SBarry Smith       for (i = low; i < high; i++) {
1304af674e45SBarry Smith         if (rp[i] > col) break;
1305af674e45SBarry Smith         if (rp[i] == col) {
1306af674e45SBarry Smith           bap = ap + 16 * i;
1307af674e45SBarry Smith           for (ii = 0; ii < 4; ii++, value += stepval) {
1308ad540459SPierre Jolivet             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1309af674e45SBarry Smith           }
1310af674e45SBarry Smith           goto noinsert2;
1311af674e45SBarry Smith         }
1312af674e45SBarry Smith       }
1313af674e45SBarry Smith       N = nrow++ - 1;
131417ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
1315af674e45SBarry Smith       /* shift up all the later entries in this row */
1316af674e45SBarry Smith       for (ii = N; ii >= i; ii--) {
1317af674e45SBarry Smith         rp[ii + 1] = rp[ii];
13189566063dSJacob Faibussowitsch         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1319af674e45SBarry Smith       }
132048a46eb9SPierre Jolivet       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1321af674e45SBarry Smith       rp[i] = col;
1322af674e45SBarry Smith       bap   = ap + 16 * i;
1323af674e45SBarry Smith       for (ii = 0; ii < 4; ii++, value += stepval) {
1324ad540459SPierre Jolivet         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1325af674e45SBarry Smith       }
1326af674e45SBarry Smith     noinsert2:;
1327af674e45SBarry Smith       low = i;
1328af674e45SBarry Smith     }
1329af674e45SBarry Smith     ailen[row] = nrow;
1330af674e45SBarry Smith   }
1331be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1332af674e45SBarry Smith }
1333af674e45SBarry Smith 
1334af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1335af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4
1336af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1337af674e45SBarry Smith #define matsetvalues4_ matsetvalues4
1338af674e45SBarry Smith #endif
1339af674e45SBarry Smith 
13409371c9d4SSatish Balay PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) {
1341af674e45SBarry Smith   Mat          A = *AA;
1342af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1343580bdb30SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1344c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
1345c1ac3661SBarry Smith   PetscInt    *aj = a->j, brow, bcol;
134617ec6a02SBarry Smith   PetscInt     ridx, cidx, lastcol = -1;
1347af674e45SBarry Smith   MatScalar   *ap, value, *aa      = a->a, *bap;
1348af674e45SBarry Smith 
1349af674e45SBarry Smith   PetscFunctionBegin;
1350af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
13519371c9d4SSatish Balay     row  = im[k];
13529371c9d4SSatish Balay     brow = row / 4;
1353af674e45SBarry Smith     rp   = aj + ai[brow];
1354af674e45SBarry Smith     ap   = aa + 16 * ai[brow];
1355af674e45SBarry Smith     nrow = ailen[brow];
1356af674e45SBarry Smith     low  = 0;
135717ec6a02SBarry Smith     high = nrow;
1358af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
13599371c9d4SSatish Balay       col   = in[l];
13609371c9d4SSatish Balay       bcol  = col / 4;
13619371c9d4SSatish Balay       ridx  = row % 4;
13629371c9d4SSatish Balay       cidx  = col % 4;
1363af674e45SBarry Smith       value = v[l + k * n];
1364db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1365db4deed7SKarl Rupp       else high = nrow;
136617ec6a02SBarry Smith       lastcol = col;
1367af674e45SBarry Smith       while (high - low > 7) {
1368af674e45SBarry Smith         t = (low + high) / 2;
1369af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1370af674e45SBarry Smith         else low = t;
1371af674e45SBarry Smith       }
1372af674e45SBarry Smith       for (i = low; i < high; i++) {
1373af674e45SBarry Smith         if (rp[i] > bcol) break;
1374af674e45SBarry Smith         if (rp[i] == bcol) {
1375af674e45SBarry Smith           bap = ap + 16 * i + 4 * cidx + ridx;
1376af674e45SBarry Smith           *bap += value;
1377af674e45SBarry Smith           goto noinsert1;
1378af674e45SBarry Smith         }
1379af674e45SBarry Smith       }
1380af674e45SBarry Smith       N = nrow++ - 1;
138117ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1382af674e45SBarry Smith       /* shift up all the later entries in this row */
13839566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
13849566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
13859566063dSJacob Faibussowitsch       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1386af674e45SBarry Smith       rp[i]                        = bcol;
1387af674e45SBarry Smith       ap[16 * i + 4 * cidx + ridx] = value;
1388af674e45SBarry Smith     noinsert1:;
1389af674e45SBarry Smith       low = i;
1390af674e45SBarry Smith     }
1391af674e45SBarry Smith     ailen[brow] = nrow;
1392af674e45SBarry Smith   }
1393be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1394af674e45SBarry Smith }
1395af674e45SBarry Smith 
1396be5855fcSBarry Smith /*
1397be5855fcSBarry Smith      Checks for missing diagonals
1398be5855fcSBarry Smith */
13999371c9d4SSatish Balay PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) {
1400be5855fcSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14017734d3b5SMatthew G. Knepley   PetscInt    *diag, *ii = a->i, i;
1402be5855fcSBarry Smith 
1403be5855fcSBarry Smith   PetscFunctionBegin;
14049566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
14052af78befSBarry Smith   *missing = PETSC_FALSE;
14067734d3b5SMatthew G. Knepley   if (A->rmap->n > 0 && !ii) {
14072efa7f71SHong Zhang     *missing = PETSC_TRUE;
14082efa7f71SHong Zhang     if (d) *d = 0;
14099566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
14102efa7f71SHong Zhang   } else {
141101445905SHong Zhang     PetscInt n;
141201445905SHong Zhang     n    = PetscMin(a->mbs, a->nbs);
1413883fce79SBarry Smith     diag = a->diag;
141401445905SHong Zhang     for (i = 0; i < n; i++) {
14157734d3b5SMatthew G. Knepley       if (diag[i] >= ii[i + 1]) {
14162af78befSBarry Smith         *missing = PETSC_TRUE;
14172af78befSBarry Smith         if (d) *d = i;
14189566063dSJacob Faibussowitsch         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1419358d2f5dSShri Abhyankar         break;
14202efa7f71SHong Zhang       }
1421be5855fcSBarry Smith     }
1422be5855fcSBarry Smith   }
1423be5855fcSBarry Smith   PetscFunctionReturn(0);
1424be5855fcSBarry Smith }
1425be5855fcSBarry Smith 
14269371c9d4SSatish Balay PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) {
1427de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
142809f38230SBarry Smith   PetscInt     i, j, m = a->mbs;
1429de6a44a3SBarry Smith 
14303a40ed3dSBarry Smith   PetscFunctionBegin;
143109f38230SBarry Smith   if (!a->diag) {
14329566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(m, &a->diag));
14334fd072dbSBarry Smith     a->free_diag = PETSC_TRUE;
143409f38230SBarry Smith   }
14357fc0212eSBarry Smith   for (i = 0; i < m; i++) {
143609f38230SBarry Smith     a->diag[i] = a->i[i + 1];
1437de6a44a3SBarry Smith     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1438de6a44a3SBarry Smith       if (a->j[j] == i) {
143909f38230SBarry Smith         a->diag[i] = j;
1440de6a44a3SBarry Smith         break;
1441de6a44a3SBarry Smith       }
1442de6a44a3SBarry Smith     }
1443de6a44a3SBarry Smith   }
14443a40ed3dSBarry Smith   PetscFunctionReturn(0);
1445de6a44a3SBarry Smith }
14462593348eSBarry Smith 
14479371c9d4SSatish Balay static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) {
14483b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14491a83f524SJed Brown   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
14501a83f524SJed Brown   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
14513b2fbd54SBarry Smith 
14523a40ed3dSBarry Smith   PetscFunctionBegin;
14533b2fbd54SBarry Smith   *nn = n;
14543a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
14553b2fbd54SBarry Smith   if (symmetric) {
14569566063dSJacob Faibussowitsch     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1457553b3c51SBarry Smith     nz = tia[n];
14583b2fbd54SBarry Smith   } else {
14599371c9d4SSatish Balay     tia = a->i;
14609371c9d4SSatish Balay     tja = a->j;
14613b2fbd54SBarry Smith   }
14623b2fbd54SBarry Smith 
1463ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1464ecc77c7aSBarry Smith     (*nn) *= bs;
14658f7157efSSatish Balay     /* malloc & create the natural set of indices */
14669566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((n + 1) * bs, ia));
14679985e31cSBarry Smith     if (n) {
14682462f5fdSStefano Zampini       (*ia)[0] = oshift;
1469ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
14709985e31cSBarry Smith     }
1471ecc77c7aSBarry Smith 
1472ecc77c7aSBarry Smith     for (i = 1; i < n; i++) {
1473ecc77c7aSBarry Smith       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1474ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
14758f7157efSSatish Balay     }
1476ad540459SPierre Jolivet     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1477ecc77c7aSBarry Smith 
14781a83f524SJed Brown     if (inja) {
14799566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz * bs * bs, ja));
14809985e31cSBarry Smith       cnt = 0;
14819985e31cSBarry Smith       for (i = 0; i < n; i++) {
14829985e31cSBarry Smith         for (j = 0; j < bs; j++) {
14839985e31cSBarry Smith           for (k = tia[i]; k < tia[i + 1]; k++) {
1484ad540459SPierre Jolivet             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
14859985e31cSBarry Smith           }
14869985e31cSBarry Smith         }
14879985e31cSBarry Smith       }
14889985e31cSBarry Smith     }
14899985e31cSBarry Smith 
14908f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
14919566063dSJacob Faibussowitsch       PetscCall(PetscFree(tia));
14929566063dSJacob Faibussowitsch       PetscCall(PetscFree(tja));
14938f7157efSSatish Balay     }
1494f6d58c54SBarry Smith   } else if (oshift == 1) {
1495715a17b5SBarry Smith     if (symmetric) {
1496a2ea699eSBarry Smith       nz = tia[A->rmap->n / bs];
1497715a17b5SBarry Smith       /*  add 1 to i and j indices */
1498715a17b5SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1499715a17b5SBarry Smith       *ia = tia;
1500715a17b5SBarry Smith       if (ja) {
1501715a17b5SBarry Smith         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1502715a17b5SBarry Smith         *ja = tja;
1503715a17b5SBarry Smith       }
1504715a17b5SBarry Smith     } else {
1505a2ea699eSBarry Smith       nz = a->i[A->rmap->n / bs];
1506f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
15079566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1508f6d58c54SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1509f6d58c54SBarry Smith       if (ja) {
15109566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(nz, ja));
1511f6d58c54SBarry Smith         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1512f6d58c54SBarry Smith       }
1513715a17b5SBarry Smith     }
15148f7157efSSatish Balay   } else {
15158f7157efSSatish Balay     *ia = tia;
1516ecc77c7aSBarry Smith     if (ja) *ja = tja;
15178f7157efSSatish Balay   }
15183a40ed3dSBarry Smith   PetscFunctionReturn(0);
15193b2fbd54SBarry Smith }
15203b2fbd54SBarry Smith 
15219371c9d4SSatish Balay static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
15223a40ed3dSBarry Smith   PetscFunctionBegin;
15233a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
1524715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
15259566063dSJacob Faibussowitsch     PetscCall(PetscFree(*ia));
15269566063dSJacob Faibussowitsch     if (ja) PetscCall(PetscFree(*ja));
15273b2fbd54SBarry Smith   }
15283a40ed3dSBarry Smith   PetscFunctionReturn(0);
15293b2fbd54SBarry Smith }
15303b2fbd54SBarry Smith 
15319371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqBAIJ(Mat A) {
15322d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15332d61bbb3SSatish Balay 
1534433994e6SBarry Smith   PetscFunctionBegin;
1535aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1536c0aa6a63SJacob Faibussowitsch   PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz);
15372d61bbb3SSatish Balay #endif
15389566063dSJacob Faibussowitsch   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
15399566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
15409566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
15419566063dSJacob Faibussowitsch   if (a->free_diag) PetscCall(PetscFree(a->diag));
15429566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->idiag));
15439566063dSJacob Faibussowitsch   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
15449566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->solve_work));
15459566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->mult_work));
15469566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_workt));
15479566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_work));
15489566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
15499566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->saved_values));
15509566063dSJacob Faibussowitsch   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1551c4319e64SHong Zhang 
15529566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->sbaijMat));
15539566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->parent));
15549566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1555901853e0SKris Buschelman 
15569566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
15579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
15589566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
15599566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
15609566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
15619566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
15629566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
15639566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
15649566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
15659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
15669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
15679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
15687ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
15699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
15707ea3e4caSstefano_zampini #endif
15719566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
15722e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
15732d61bbb3SSatish Balay   PetscFunctionReturn(0);
15742d61bbb3SSatish Balay }
15752d61bbb3SSatish Balay 
15769371c9d4SSatish Balay PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) {
15772d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15782d61bbb3SSatish Balay 
15792d61bbb3SSatish Balay   PetscFunctionBegin;
1580aa275fccSKris Buschelman   switch (op) {
15819371c9d4SSatish Balay   case MAT_ROW_ORIENTED: a->roworiented = flg; break;
15829371c9d4SSatish Balay   case MAT_KEEP_NONZERO_PATTERN: a->keepnonzeropattern = flg; break;
15839371c9d4SSatish Balay   case MAT_NEW_NONZERO_LOCATIONS: a->nonew = (flg ? 0 : 1); break;
15849371c9d4SSatish Balay   case MAT_NEW_NONZERO_LOCATION_ERR: a->nonew = (flg ? -1 : 0); break;
15859371c9d4SSatish Balay   case MAT_NEW_NONZERO_ALLOCATION_ERR: a->nonew = (flg ? -2 : 0); break;
15869371c9d4SSatish Balay   case MAT_UNUSED_NONZERO_LOCATION_ERR: a->nounused = (flg ? -1 : 0); break;
15878c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1588aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
1589aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
15909371c9d4SSatish Balay   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
15915021d80fSJed Brown   case MAT_SPD:
159277e54ba9SKris Buschelman   case MAT_SYMMETRIC:
159377e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
15949a4540c5SBarry Smith   case MAT_HERMITIAN:
15959a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1596b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1597c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1598672ba085SHong Zhang   case MAT_STRUCTURE_ONLY:
1599b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
1600b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
160177e54ba9SKris Buschelman     break;
16029371c9d4SSatish Balay   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
16032d61bbb3SSatish Balay   }
16042d61bbb3SSatish Balay   PetscFunctionReturn(0);
16052d61bbb3SSatish Balay }
16062d61bbb3SSatish Balay 
160752768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
16089371c9d4SSatish Balay PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) {
160952768537SHong Zhang   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
161052768537SHong Zhang   MatScalar   *aa_i;
161187828ca2SBarry Smith   PetscScalar *v_i;
16122d61bbb3SSatish Balay 
16132d61bbb3SSatish Balay   PetscFunctionBegin;
1614d0f46423SBarry Smith   bs  = A->rmap->bs;
161552768537SHong Zhang   bs2 = bs * bs;
16165f80ce2aSJacob Faibussowitsch   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
16172d61bbb3SSatish Balay 
16182d61bbb3SSatish Balay   bn  = row / bs; /* Block number */
16192d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
16202d61bbb3SSatish Balay   M   = ai[bn + 1] - ai[bn];
16212d61bbb3SSatish Balay   *nz = bs * M;
16222d61bbb3SSatish Balay 
16232d61bbb3SSatish Balay   if (v) {
1624f4259b30SLisandro Dalcin     *v = NULL;
16252d61bbb3SSatish Balay     if (*nz) {
16269566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, v));
16272d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16282d61bbb3SSatish Balay         v_i  = *v + i * bs;
16292d61bbb3SSatish Balay         aa_i = aa + bs2 * (ai[bn] + i);
163026fbe8dcSKarl Rupp         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
16312d61bbb3SSatish Balay       }
16322d61bbb3SSatish Balay     }
16332d61bbb3SSatish Balay   }
16342d61bbb3SSatish Balay 
16352d61bbb3SSatish Balay   if (idx) {
1636f4259b30SLisandro Dalcin     *idx = NULL;
16372d61bbb3SSatish Balay     if (*nz) {
16389566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, idx));
16392d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16402d61bbb3SSatish Balay         idx_i = *idx + i * bs;
16412d61bbb3SSatish Balay         itmp  = bs * aj[ai[bn] + i];
164226fbe8dcSKarl Rupp         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
16432d61bbb3SSatish Balay       }
16442d61bbb3SSatish Balay     }
16452d61bbb3SSatish Balay   }
16462d61bbb3SSatish Balay   PetscFunctionReturn(0);
16472d61bbb3SSatish Balay }
16482d61bbb3SSatish Balay 
16499371c9d4SSatish Balay PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
165052768537SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
165152768537SHong Zhang 
165252768537SHong Zhang   PetscFunctionBegin;
16539566063dSJacob Faibussowitsch   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
165452768537SHong Zhang   PetscFunctionReturn(0);
165552768537SHong Zhang }
165652768537SHong Zhang 
16579371c9d4SSatish Balay PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
16582d61bbb3SSatish Balay   PetscFunctionBegin;
1659cb4a9cd9SHong Zhang   if (nz) *nz = 0;
16609566063dSJacob Faibussowitsch   if (idx) PetscCall(PetscFree(*idx));
16619566063dSJacob Faibussowitsch   if (v) PetscCall(PetscFree(*v));
16622d61bbb3SSatish Balay   PetscFunctionReturn(0);
16632d61bbb3SSatish Balay }
16642d61bbb3SSatish Balay 
16659371c9d4SSatish Balay PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) {
166620e84f26SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
16672d61bbb3SSatish Balay   Mat          C;
166820e84f26SHong Zhang   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
166920e84f26SHong Zhang   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
167020e84f26SHong Zhang   MatScalar   *ata, *aa = a->a;
16712d61bbb3SSatish Balay 
16722d61bbb3SSatish Balay   PetscFunctionBegin;
16737fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
16749566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1675cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
167620e84f26SHong Zhang     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
16772d61bbb3SSatish Balay 
16789566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
16799566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
16809566063dSJacob Faibussowitsch     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
16819566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
168220e84f26SHong Zhang 
168320e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
168420e84f26SHong Zhang     ati = at->i;
168520e84f26SHong Zhang     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1686fc4dec0aSBarry Smith   } else {
1687fc4dec0aSBarry Smith     C   = *B;
168820e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
168920e84f26SHong Zhang     ati = at->i;
1690fc4dec0aSBarry Smith   }
1691fc4dec0aSBarry Smith 
169220e84f26SHong Zhang   atj = at->j;
169320e84f26SHong Zhang   ata = at->a;
169420e84f26SHong Zhang 
169520e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
16969566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(atfill, ati, nbs));
169720e84f26SHong Zhang 
169820e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
16992d61bbb3SSatish Balay   for (i = 0; i < mbs; i++) {
170020e84f26SHong Zhang     anzj = ai[i + 1] - ai[i];
170120e84f26SHong Zhang     for (j = 0; j < anzj; j++) {
170220e84f26SHong Zhang       atj[atfill[*aj]] = i;
170320e84f26SHong Zhang       for (kr = 0; kr < bs; kr++) {
1704ad540459SPierre Jolivet         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
17052d61bbb3SSatish Balay       }
170620e84f26SHong Zhang       atfill[*aj++] += 1;
170720e84f26SHong Zhang     }
170820e84f26SHong Zhang   }
17099566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
17109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
17112d61bbb3SSatish Balay 
171220e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
17139566063dSJacob Faibussowitsch   PetscCall(PetscFree(atfill));
171420e84f26SHong Zhang 
1715cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
17169566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
17172d61bbb3SSatish Balay     *B = C;
17182d61bbb3SSatish Balay   } else {
17199566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &C));
17202d61bbb3SSatish Balay   }
17212d61bbb3SSatish Balay   PetscFunctionReturn(0);
17222d61bbb3SSatish Balay }
17232d61bbb3SSatish Balay 
17249371c9d4SSatish Balay PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) {
1725453d3561SHong Zhang   Mat Btrans;
1726453d3561SHong Zhang 
1727453d3561SHong Zhang   PetscFunctionBegin;
1728453d3561SHong Zhang   *f = PETSC_FALSE;
1729acd337a6SBarry Smith   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
17309566063dSJacob Faibussowitsch   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
17319566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Btrans));
1732453d3561SHong Zhang   PetscFunctionReturn(0);
1733453d3561SHong Zhang }
1734453d3561SHong Zhang 
1735618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
17369371c9d4SSatish Balay PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) {
1737b51a4376SLisandro Dalcin   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1738b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1739b51a4376SLisandro Dalcin   PetscInt    *rowlens, *colidxs;
1740b51a4376SLisandro Dalcin   PetscScalar *matvals;
17412593348eSBarry Smith 
17423a40ed3dSBarry Smith   PetscFunctionBegin;
17439566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
17443b2fbd54SBarry Smith 
1745b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1746b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1747b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1748b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1749b51a4376SLisandro Dalcin   nz = bs * bs * A->nz;
17502593348eSBarry Smith 
1751b51a4376SLisandro Dalcin   /* write matrix header */
1752b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
17539371c9d4SSatish Balay   header[1] = M;
17549371c9d4SSatish Balay   header[2] = N;
17559371c9d4SSatish Balay   header[3] = nz;
17569566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
17572593348eSBarry Smith 
1758b51a4376SLisandro Dalcin   /* store row lengths */
17599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
1760b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
17619371c9d4SSatish Balay     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
17629566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
17639566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
1764b51a4376SLisandro Dalcin 
1765b51a4376SLisandro Dalcin   /* store column indices  */
17669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
1767b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1768b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1769b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
17709371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
17715f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
17729566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
17739566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
17742593348eSBarry Smith 
17752593348eSBarry Smith   /* store nonzero values */
17769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
1777b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1778b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1779b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
17809371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
17815f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
17829566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
17839566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
1784ce6f0cecSBarry Smith 
1785b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
17869566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
17873a40ed3dSBarry Smith   PetscFunctionReturn(0);
17882593348eSBarry Smith }
17892593348eSBarry Smith 
17909371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) {
17917dc0baabSHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
17927dc0baabSHong Zhang   PetscInt     i, bs = A->rmap->bs, k;
17937dc0baabSHong Zhang 
17947dc0baabSHong Zhang   PetscFunctionBegin;
17959566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
17967dc0baabSHong Zhang   for (i = 0; i < a->mbs; i++) {
17979566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
179848a46eb9SPierre Jolivet     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
17999566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18007dc0baabSHong Zhang   }
18019566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18027dc0baabSHong Zhang   PetscFunctionReturn(0);
18037dc0baabSHong Zhang }
18047dc0baabSHong Zhang 
18059371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) {
1806b6490206SBarry Smith   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1807d0f46423SBarry Smith   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1808f3ef73ceSBarry Smith   PetscViewerFormat format;
18092593348eSBarry Smith 
18103a40ed3dSBarry Smith   PetscFunctionBegin;
18117dc0baabSHong Zhang   if (A->structure_only) {
18129566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
18137dc0baabSHong Zhang     PetscFunctionReturn(0);
18147dc0baabSHong Zhang   }
18157dc0baabSHong Zhang 
18169566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
1817456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
18189566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1819fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1820ade3a672SBarry Smith     const char *matname;
1821bcd9e38bSBarry Smith     Mat         aij;
18229566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
18239566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
18249566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
18259566063dSJacob Faibussowitsch     PetscCall(MatView(aij, viewer));
18269566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&aij));
182704929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
182804929863SHong Zhang     PetscFunctionReturn(0);
1829fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
18309566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
183144cd7ae7SLois Curfman McInnes     for (i = 0; i < a->mbs; i++) {
183244cd7ae7SLois Curfman McInnes       for (j = 0; j < bs; j++) {
18339566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
183444cd7ae7SLois Curfman McInnes         for (k = a->i[i]; k < a->i[i + 1]; k++) {
183544cd7ae7SLois Curfman McInnes           for (l = 0; l < bs; l++) {
1836aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18370e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18389371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18390e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18409371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18410e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18429566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
18430ef38995SBarry Smith             }
184444cd7ae7SLois Curfman McInnes #else
184548a46eb9SPierre Jolivet             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
184644cd7ae7SLois Curfman McInnes #endif
184744cd7ae7SLois Curfman McInnes           }
184844cd7ae7SLois Curfman McInnes         }
18499566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
185044cd7ae7SLois Curfman McInnes       }
185144cd7ae7SLois Curfman McInnes     }
18529566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18530ef38995SBarry Smith   } else {
18549566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1855b6490206SBarry Smith     for (i = 0; i < a->mbs; i++) {
1856b6490206SBarry Smith       for (j = 0; j < bs; j++) {
18579566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1858b6490206SBarry Smith         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1859b6490206SBarry Smith           for (l = 0; l < bs; l++) {
1860aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18610e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
18629371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18630e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
18649371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18650ef38995SBarry Smith             } else {
18669566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
186788685aaeSLois Curfman McInnes             }
186888685aaeSLois Curfman McInnes #else
18699566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
187088685aaeSLois Curfman McInnes #endif
18712593348eSBarry Smith           }
18722593348eSBarry Smith         }
18739566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18742593348eSBarry Smith       }
18752593348eSBarry Smith     }
18769566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1877b6490206SBarry Smith   }
18789566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
18793a40ed3dSBarry Smith   PetscFunctionReturn(0);
18802593348eSBarry Smith }
18812593348eSBarry Smith 
18829804daf3SBarry Smith #include <petscdraw.h>
18839371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) {
188477ed5343SBarry Smith   Mat               A = (Mat)Aa;
18853270192aSSatish Balay   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1886d0f46423SBarry Smith   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
18870e6d2581SBarry Smith   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
18883f1db9ecSBarry Smith   MatScalar        *aa;
1889b0a32e0cSBarry Smith   PetscViewer       viewer;
1890b3e7f47fSJed Brown   PetscViewerFormat format;
18913270192aSSatish Balay 
18923a40ed3dSBarry Smith   PetscFunctionBegin;
18939566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
18949566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
18959566063dSJacob Faibussowitsch   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
189677ed5343SBarry Smith 
18973270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1898b3e7f47fSJed Brown 
1899b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1900d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1901383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1902b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
19033270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19043270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19059371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19069371c9d4SSatish Balay         y_r = y_l + 1.0;
19079371c9d4SSatish Balay         x_l = a->j[j] * bs;
19089371c9d4SSatish Balay         x_r = x_l + 1.0;
19093270192aSSatish Balay         aa  = a->a + j * bs2;
19103270192aSSatish Balay         for (k = 0; k < bs; k++) {
19113270192aSSatish Balay           for (l = 0; l < bs; l++) {
19120e6d2581SBarry Smith             if (PetscRealPart(*aa++) >= 0.) continue;
19139566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19143270192aSSatish Balay           }
19153270192aSSatish Balay         }
19163270192aSSatish Balay       }
19173270192aSSatish Balay     }
1918b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
19193270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19203270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19219371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19229371c9d4SSatish Balay         y_r = y_l + 1.0;
19239371c9d4SSatish Balay         x_l = a->j[j] * bs;
19249371c9d4SSatish Balay         x_r = x_l + 1.0;
19253270192aSSatish Balay         aa  = a->a + j * bs2;
19263270192aSSatish Balay         for (k = 0; k < bs; k++) {
19273270192aSSatish Balay           for (l = 0; l < bs; l++) {
19280e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
19299566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19303270192aSSatish Balay           }
19313270192aSSatish Balay         }
19323270192aSSatish Balay       }
19333270192aSSatish Balay     }
1934b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
19353270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19363270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19379371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19389371c9d4SSatish Balay         y_r = y_l + 1.0;
19399371c9d4SSatish Balay         x_l = a->j[j] * bs;
19409371c9d4SSatish Balay         x_r = x_l + 1.0;
19413270192aSSatish Balay         aa  = a->a + j * bs2;
19423270192aSSatish Balay         for (k = 0; k < bs; k++) {
19433270192aSSatish Balay           for (l = 0; l < bs; l++) {
19440e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
19459566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19463270192aSSatish Balay           }
19473270192aSSatish Balay         }
19483270192aSSatish Balay       }
19493270192aSSatish Balay     }
1950d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
1951b3e7f47fSJed Brown   } else {
1952b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
1953b3e7f47fSJed Brown     /* first determine max of all nonzero values */
1954b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
1955b3e7f47fSJed Brown     PetscDraw popup;
1956b3e7f47fSJed Brown 
1957b3e7f47fSJed Brown     for (i = 0; i < a->nz * a->bs2; i++) {
1958b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1959b3e7f47fSJed Brown     }
1960383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
19619566063dSJacob Faibussowitsch     PetscCall(PetscDrawGetPopup(draw, &popup));
19629566063dSJacob Faibussowitsch     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1963383922c3SLisandro Dalcin 
1964d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1965b3e7f47fSJed Brown     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1966b3e7f47fSJed Brown       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19679371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19689371c9d4SSatish Balay         y_r = y_l + 1.0;
19699371c9d4SSatish Balay         x_l = a->j[j] * bs;
19709371c9d4SSatish Balay         x_r = x_l + 1.0;
1971b3e7f47fSJed Brown         aa  = a->a + j * bs2;
1972b3e7f47fSJed Brown         for (k = 0; k < bs; k++) {
1973b3e7f47fSJed Brown           for (l = 0; l < bs; l++) {
1974383922c3SLisandro Dalcin             MatScalar v = *aa++;
1975383922c3SLisandro Dalcin             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
19769566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1977b3e7f47fSJed Brown           }
1978b3e7f47fSJed Brown         }
1979b3e7f47fSJed Brown       }
1980b3e7f47fSJed Brown     }
1981d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
1982b3e7f47fSJed Brown   }
198377ed5343SBarry Smith   PetscFunctionReturn(0);
198477ed5343SBarry Smith }
19853270192aSSatish Balay 
19869371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) {
19870e6d2581SBarry Smith   PetscReal xl, yl, xr, yr, w, h;
1988b0a32e0cSBarry Smith   PetscDraw draw;
1989ace3abfcSBarry Smith   PetscBool isnull;
19903270192aSSatish Balay 
199177ed5343SBarry Smith   PetscFunctionBegin;
19929566063dSJacob Faibussowitsch   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
19939566063dSJacob Faibussowitsch   PetscCall(PetscDrawIsNull(draw, &isnull));
199445f3bb6eSLisandro Dalcin   if (isnull) PetscFunctionReturn(0);
199577ed5343SBarry Smith 
19969371c9d4SSatish Balay   xr = A->cmap->n;
19979371c9d4SSatish Balay   yr = A->rmap->N;
19989371c9d4SSatish Balay   h  = yr / 10.0;
19999371c9d4SSatish Balay   w  = xr / 10.0;
20009371c9d4SSatish Balay   xr += w;
20019371c9d4SSatish Balay   yr += h;
20029371c9d4SSatish Balay   xl = -w;
20039371c9d4SSatish Balay   yl = -h;
20049566063dSJacob Faibussowitsch   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
20059566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
20069566063dSJacob Faibussowitsch   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
20079566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
20089566063dSJacob Faibussowitsch   PetscCall(PetscDrawSave(draw));
20093a40ed3dSBarry Smith   PetscFunctionReturn(0);
20103270192aSSatish Balay }
20113270192aSSatish Balay 
20129371c9d4SSatish Balay PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) {
2013ace3abfcSBarry Smith   PetscBool iascii, isbinary, isdraw;
20142593348eSBarry Smith 
20153a40ed3dSBarry Smith   PetscFunctionBegin;
20169566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
20179566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
20189566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
201932077d6dSBarry Smith   if (iascii) {
20209566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
20210f5bd95cSBarry Smith   } else if (isbinary) {
20229566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
20230f5bd95cSBarry Smith   } else if (isdraw) {
20249566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
20255cd90555SBarry Smith   } else {
2026a5e6ed63SBarry Smith     Mat B;
20279566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
20289566063dSJacob Faibussowitsch     PetscCall(MatView(B, viewer));
20299566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
20302593348eSBarry Smith   }
20313a40ed3dSBarry Smith   PetscFunctionReturn(0);
20322593348eSBarry Smith }
2033b6490206SBarry Smith 
20349371c9d4SSatish Balay PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) {
2035cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2036c1ac3661SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2037c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
2038d0f46423SBarry Smith   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
203997e567efSBarry Smith   MatScalar   *ap, *aa = a->a;
2040cd0e1443SSatish Balay 
20413a40ed3dSBarry Smith   PetscFunctionBegin;
20422d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over rows */
20439371c9d4SSatish Balay     row  = im[k];
20449371c9d4SSatish Balay     brow = row / bs;
20459371c9d4SSatish Balay     if (row < 0) {
20469371c9d4SSatish Balay       v += n;
20479371c9d4SSatish Balay       continue;
20489371c9d4SSatish Balay     } /* negative row */
204954c59aa7SJacob Faibussowitsch     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2050d29f2997SMatthew Woehlke     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2051d29f2997SMatthew Woehlke     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
20522c3acbe9SBarry Smith     nrow = ailen[brow];
20532d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over columns */
20549371c9d4SSatish Balay       if (in[l] < 0) {
20559371c9d4SSatish Balay         v++;
20569371c9d4SSatish Balay         continue;
20579371c9d4SSatish Balay       } /* negative column */
205854c59aa7SJacob Faibussowitsch       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
20592d61bbb3SSatish Balay       col  = in[l];
20602d61bbb3SSatish Balay       bcol = col / bs;
20612d61bbb3SSatish Balay       cidx = col % bs;
20622d61bbb3SSatish Balay       ridx = row % bs;
20632d61bbb3SSatish Balay       high = nrow;
20642d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
20652d61bbb3SSatish Balay       while (high - low > 5) {
2066cd0e1443SSatish Balay         t = (low + high) / 2;
2067cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
2068cd0e1443SSatish Balay         else low = t;
2069cd0e1443SSatish Balay       }
2070cd0e1443SSatish Balay       for (i = low; i < high; i++) {
2071cd0e1443SSatish Balay         if (rp[i] > bcol) break;
2072cd0e1443SSatish Balay         if (rp[i] == bcol) {
20732d61bbb3SSatish Balay           *v++ = ap[bs2 * i + bs * cidx + ridx];
20742d61bbb3SSatish Balay           goto finished;
2075cd0e1443SSatish Balay         }
2076cd0e1443SSatish Balay       }
207797e567efSBarry Smith       *v++ = 0.0;
20782d61bbb3SSatish Balay     finished:;
2079cd0e1443SSatish Balay     }
2080cd0e1443SSatish Balay   }
20813a40ed3dSBarry Smith   PetscFunctionReturn(0);
2082cd0e1443SSatish Balay }
2083cd0e1443SSatish Balay 
20849371c9d4SSatish Balay PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) {
208592c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2086e2ee6c50SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2087c1ac3661SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2088d0f46423SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2089ace3abfcSBarry Smith   PetscBool          roworiented = a->roworiented;
2090dd6ea824SBarry Smith   const PetscScalar *value       = v;
20919d243f67SHong Zhang   MatScalar         *ap = NULL, *aa = a->a, *bap;
209292c4ed94SBarry Smith 
20933a40ed3dSBarry Smith   PetscFunctionBegin;
20940e324ae4SSatish Balay   if (roworiented) {
20950e324ae4SSatish Balay     stepval = (n - 1) * bs;
20960e324ae4SSatish Balay   } else {
20970e324ae4SSatish Balay     stepval = (m - 1) * bs;
20980e324ae4SSatish Balay   }
209992c4ed94SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
210092c4ed94SBarry Smith     row = im[k];
21015ef9f2a5SBarry Smith     if (row < 0) continue;
21026bdcaf15SBarry Smith     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
210392c4ed94SBarry Smith     rp = aj + ai[row];
21047dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[row];
210592c4ed94SBarry Smith     rmax = imax[row];
210692c4ed94SBarry Smith     nrow = ailen[row];
210792c4ed94SBarry Smith     low  = 0;
2108c71e6ed7SBarry Smith     high = nrow;
210992c4ed94SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
21105ef9f2a5SBarry Smith       if (in[l] < 0) continue;
21116bdcaf15SBarry Smith       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
211292c4ed94SBarry Smith       col = in[l];
21137dc0baabSHong Zhang       if (!A->structure_only) {
211492c4ed94SBarry Smith         if (roworiented) {
211553ef36baSBarry Smith           value = v + (k * (stepval + bs) + l) * bs;
21160e324ae4SSatish Balay         } else {
211753ef36baSBarry Smith           value = v + (l * (stepval + bs) + k) * bs;
211892c4ed94SBarry Smith         }
21197dc0baabSHong Zhang       }
212026fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
212126fbe8dcSKarl Rupp       else high = nrow;
2122e2ee6c50SBarry Smith       lastcol = col;
212392c4ed94SBarry Smith       while (high - low > 7) {
212492c4ed94SBarry Smith         t = (low + high) / 2;
212592c4ed94SBarry Smith         if (rp[t] > col) high = t;
212692c4ed94SBarry Smith         else low = t;
212792c4ed94SBarry Smith       }
212892c4ed94SBarry Smith       for (i = low; i < high; i++) {
212992c4ed94SBarry Smith         if (rp[i] > col) break;
213092c4ed94SBarry Smith         if (rp[i] == col) {
21317dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
21328a84c255SSatish Balay           bap = ap + bs2 * i;
21330e324ae4SSatish Balay           if (roworiented) {
21348a84c255SSatish Balay             if (is == ADD_VALUES) {
2135dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2136ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2137dd9472c6SBarry Smith               }
21380e324ae4SSatish Balay             } else {
2139dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2140ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2141dd9472c6SBarry Smith               }
2142dd9472c6SBarry Smith             }
21430e324ae4SSatish Balay           } else {
21440e324ae4SSatish Balay             if (is == ADD_VALUES) {
214553ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2146ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
214753ef36baSBarry Smith                 bap += bs;
2148dd9472c6SBarry Smith               }
21490e324ae4SSatish Balay             } else {
215053ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2151ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
215253ef36baSBarry Smith                 bap += bs;
21538a84c255SSatish Balay               }
2154dd9472c6SBarry Smith             }
2155dd9472c6SBarry Smith           }
2156f1241b54SBarry Smith           goto noinsert2;
215792c4ed94SBarry Smith         }
215892c4ed94SBarry Smith       }
215989280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
21605f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
21617dc0baabSHong Zhang       if (A->structure_only) {
21627dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
21637dc0baabSHong Zhang       } else {
2164fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
21657dc0baabSHong Zhang       }
21669371c9d4SSatish Balay       N = nrow++ - 1;
21679371c9d4SSatish Balay       high++;
216892c4ed94SBarry Smith       /* shift up all the later entries in this row */
21699566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
217092c4ed94SBarry Smith       rp[i] = col;
21717dc0baabSHong Zhang       if (!A->structure_only) {
21729566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
21738a84c255SSatish Balay         bap = ap + bs2 * i;
21740e324ae4SSatish Balay         if (roworiented) {
2175dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2176ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2177dd9472c6SBarry Smith           }
21780e324ae4SSatish Balay         } else {
2179dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2180ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2181dd9472c6SBarry Smith           }
2182dd9472c6SBarry Smith         }
21837dc0baabSHong Zhang       }
2184f1241b54SBarry Smith     noinsert2:;
218592c4ed94SBarry Smith       low = i;
218692c4ed94SBarry Smith     }
218792c4ed94SBarry Smith     ailen[row] = nrow;
218892c4ed94SBarry Smith   }
21893a40ed3dSBarry Smith   PetscFunctionReturn(0);
219092c4ed94SBarry Smith }
219126e093fcSHong Zhang 
21929371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) {
2193584200bdSSatish Balay   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2194580bdb30SBarry Smith   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2195d0f46423SBarry Smith   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2196c1ac3661SBarry Smith   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
21973f1db9ecSBarry Smith   MatScalar   *aa    = a->a, *ap;
21983447b6efSHong Zhang   PetscReal    ratio = 0.6;
2199584200bdSSatish Balay 
22003a40ed3dSBarry Smith   PetscFunctionBegin;
22013a40ed3dSBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2202584200bdSSatish Balay 
220343ee02c3SBarry Smith   if (m) rmax = ailen[0];
2204584200bdSSatish Balay   for (i = 1; i < mbs; i++) {
2205584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
2206584200bdSSatish Balay     fshift += imax[i - 1] - ailen[i - 1];
2207d402145bSBarry Smith     rmax = PetscMax(rmax, ailen[i]);
2208584200bdSSatish Balay     if (fshift) {
2209580bdb30SBarry Smith       ip = aj + ai[i];
2210580bdb30SBarry Smith       ap = aa + bs2 * ai[i];
2211584200bdSSatish Balay       N  = ailen[i];
22129566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(ip - fshift, ip, N));
221348a46eb9SPierre Jolivet       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2214672ba085SHong Zhang     }
2215584200bdSSatish Balay     ai[i] = ai[i - 1] + ailen[i - 1];
2216584200bdSSatish Balay   }
2217584200bdSSatish Balay   if (mbs) {
2218584200bdSSatish Balay     fshift += imax[mbs - 1] - ailen[mbs - 1];
2219584200bdSSatish Balay     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2220584200bdSSatish Balay   }
22217c565772SBarry Smith 
2222584200bdSSatish Balay   /* reset ilen and imax for each row */
22237c565772SBarry Smith   a->nonzerorowcnt = 0;
2224672ba085SHong Zhang   if (A->structure_only) {
22259566063dSJacob Faibussowitsch     PetscCall(PetscFree2(a->imax, a->ilen));
2226672ba085SHong Zhang   } else { /* !A->structure_only */
2227584200bdSSatish Balay     for (i = 0; i < mbs; i++) {
2228584200bdSSatish Balay       ailen[i] = imax[i] = ai[i + 1] - ai[i];
22297c565772SBarry Smith       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2230584200bdSSatish Balay     }
2231672ba085SHong Zhang   }
2232a7c10996SSatish Balay   a->nz = ai[mbs];
2233584200bdSSatish Balay 
2234584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
2235b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
2236584200bdSSatish Balay   if (fshift && a->diag) {
22379566063dSJacob Faibussowitsch     PetscCall(PetscFree(a->diag));
2238f4259b30SLisandro Dalcin     a->diag = NULL;
2239584200bdSSatish Balay   }
22405f80ce2aSJacob Faibussowitsch   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
22419566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
22429566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
22439566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
224426fbe8dcSKarl Rupp 
22458e58a170SBarry Smith   A->info.mallocs += a->reallocs;
2246e2f3b5e9SSatish Balay   a->reallocs         = 0;
22470e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2248647a6520SHong Zhang   a->rmax             = rmax;
2249cf4441caSHong Zhang 
225048a46eb9SPierre Jolivet   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
22513a40ed3dSBarry Smith   PetscFunctionReturn(0);
2252584200bdSSatish Balay }
2253584200bdSSatish Balay 
2254bea157c4SSatish Balay /*
2255bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2256bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2257a5b23f4aSJose E. Roman    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2258bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2259bea157c4SSatish Balay */
22609371c9d4SSatish Balay static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) {
2261c1ac3661SBarry Smith   PetscInt  i, j, k, row;
2262ace3abfcSBarry Smith   PetscBool flg;
22633a40ed3dSBarry Smith 
2264433994e6SBarry Smith   PetscFunctionBegin;
2265bea157c4SSatish Balay   for (i = 0, j = 0; i < n; j++) {
2266bea157c4SSatish Balay     row = idx[i];
2267a5b23f4aSJose E. Roman     if (row % bs != 0) { /* Not the beginning of a block */
2268bea157c4SSatish Balay       sizes[j] = 1;
2269bea157c4SSatish Balay       i++;
2270e4fda26cSSatish Balay     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2271bea157c4SSatish Balay       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2272bea157c4SSatish Balay       i++;
22736aad120cSJose E. Roman     } else { /* Beginning of the block, so check if the complete block exists */
2274bea157c4SSatish Balay       flg = PETSC_TRUE;
2275bea157c4SSatish Balay       for (k = 1; k < bs; k++) {
2276bea157c4SSatish Balay         if (row + k != idx[i + k]) { /* break in the block */
2277bea157c4SSatish Balay           flg = PETSC_FALSE;
2278bea157c4SSatish Balay           break;
2279d9b7c43dSSatish Balay         }
2280bea157c4SSatish Balay       }
2281abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2282bea157c4SSatish Balay         sizes[j] = bs;
2283bea157c4SSatish Balay         i += bs;
2284bea157c4SSatish Balay       } else {
2285bea157c4SSatish Balay         sizes[j] = 1;
2286bea157c4SSatish Balay         i++;
2287bea157c4SSatish Balay       }
2288bea157c4SSatish Balay     }
2289bea157c4SSatish Balay   }
2290bea157c4SSatish Balay   *bs_max = j;
22913a40ed3dSBarry Smith   PetscFunctionReturn(0);
2292d9b7c43dSSatish Balay }
2293d9b7c43dSSatish Balay 
22949371c9d4SSatish Balay PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) {
2295d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2296f4df32b1SMatthew Knepley   PetscInt           i, j, k, count, *rows;
2297d0f46423SBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
229887828ca2SBarry Smith   PetscScalar        zero = 0.0;
22993f1db9ecSBarry Smith   MatScalar         *aa;
230097b48c8fSBarry Smith   const PetscScalar *xx;
230197b48c8fSBarry Smith   PetscScalar       *bb;
2302d9b7c43dSSatish Balay 
23033a40ed3dSBarry Smith   PetscFunctionBegin;
230497b48c8fSBarry Smith   /* fix right hand side if needed */
230597b48c8fSBarry Smith   if (x && b) {
23069566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23079566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
2308ad540459SPierre Jolivet     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
23099566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
23109566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
231197b48c8fSBarry Smith   }
231297b48c8fSBarry Smith 
2313d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2314bea157c4SSatish Balay   /* allocate memory for rows,sizes */
23159566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2316bea157c4SSatish Balay 
2317563b5814SBarry Smith   /* copy IS values to rows, and sort them */
231826fbe8dcSKarl Rupp   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
23199566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(is_n, rows));
232097b48c8fSBarry Smith 
2321a9817697SBarry Smith   if (baij->keepnonzeropattern) {
232226fbe8dcSKarl Rupp     for (i = 0; i < is_n; i++) sizes[i] = 1;
2323dffd3267SBarry Smith     bs_max = is_n;
2324dffd3267SBarry Smith   } else {
23259566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2326e56f5c9eSBarry Smith     A->nonzerostate++;
2327dffd3267SBarry Smith   }
2328bea157c4SSatish Balay 
2329bea157c4SSatish Balay   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2330bea157c4SSatish Balay     row = rows[j];
23315f80ce2aSJacob Faibussowitsch     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2332bea157c4SSatish Balay     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2333b31fbe3bSSatish Balay     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2334a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2335d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2336bea157c4SSatish Balay         if (baij->ilen[row / bs] > 0) {
2337bea157c4SSatish Balay           baij->ilen[row / bs]       = 1;
2338bea157c4SSatish Balay           baij->j[baij->i[row / bs]] = row / bs;
233926fbe8dcSKarl Rupp 
23409566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(aa, count * bs));
2341a07cd24cSSatish Balay         }
2342563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
234348a46eb9SPierre Jolivet         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2344f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2345bea157c4SSatish Balay         baij->ilen[row / bs] = 0;
2346f4df32b1SMatthew Knepley       }      /* end (diag == 0.0) */
2347bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
23486bdcaf15SBarry Smith       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2349bea157c4SSatish Balay       for (k = 0; k < count; k++) {
2350d9b7c43dSSatish Balay         aa[0] = zero;
2351d9b7c43dSSatish Balay         aa += bs;
2352d9b7c43dSSatish Balay       }
235348a46eb9SPierre Jolivet       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2354d9b7c43dSSatish Balay     }
2355bea157c4SSatish Balay   }
2356bea157c4SSatish Balay 
23579566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, sizes));
23589566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
23593a40ed3dSBarry Smith   PetscFunctionReturn(0);
2360d9b7c43dSSatish Balay }
23611c351548SSatish Balay 
23629371c9d4SSatish Balay PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) {
236397b48c8fSBarry Smith   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
236497b48c8fSBarry Smith   PetscInt           i, j, k, count;
236597b48c8fSBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
236697b48c8fSBarry Smith   PetscScalar        zero = 0.0;
236797b48c8fSBarry Smith   MatScalar         *aa;
236897b48c8fSBarry Smith   const PetscScalar *xx;
236997b48c8fSBarry Smith   PetscScalar       *bb;
237056777dd2SBarry Smith   PetscBool         *zeroed, vecs = PETSC_FALSE;
237197b48c8fSBarry Smith 
237297b48c8fSBarry Smith   PetscFunctionBegin;
237397b48c8fSBarry Smith   /* fix right hand side if needed */
237497b48c8fSBarry Smith   if (x && b) {
23759566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23769566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
237756777dd2SBarry Smith     vecs = PETSC_TRUE;
237897b48c8fSBarry Smith   }
237997b48c8fSBarry Smith 
238097b48c8fSBarry Smith   /* zero the columns */
23819566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
238297b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
23835f80ce2aSJacob Faibussowitsch     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
238497b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
238597b48c8fSBarry Smith   }
238697b48c8fSBarry Smith   for (i = 0; i < A->rmap->N; i++) {
238797b48c8fSBarry Smith     if (!zeroed[i]) {
238897b48c8fSBarry Smith       row = i / bs;
238997b48c8fSBarry Smith       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
239097b48c8fSBarry Smith         for (k = 0; k < bs; k++) {
239197b48c8fSBarry Smith           col = bs * baij->j[j] + k;
239297b48c8fSBarry Smith           if (zeroed[col]) {
239397b48c8fSBarry Smith             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
239456777dd2SBarry Smith             if (vecs) bb[i] -= aa[0] * xx[col];
239597b48c8fSBarry Smith             aa[0] = 0.0;
239697b48c8fSBarry Smith           }
239797b48c8fSBarry Smith         }
239897b48c8fSBarry Smith       }
239956777dd2SBarry Smith     } else if (vecs) bb[i] = diag * xx[i];
240097b48c8fSBarry Smith   }
24019566063dSJacob Faibussowitsch   PetscCall(PetscFree(zeroed));
240256777dd2SBarry Smith   if (vecs) {
24039566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
24049566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
240556777dd2SBarry Smith   }
240697b48c8fSBarry Smith 
240797b48c8fSBarry Smith   /* zero the rows */
240897b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
240997b48c8fSBarry Smith     row   = is_idx[i];
241097b48c8fSBarry Smith     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
241197b48c8fSBarry Smith     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
241297b48c8fSBarry Smith     for (k = 0; k < count; k++) {
241397b48c8fSBarry Smith       aa[0] = zero;
241497b48c8fSBarry Smith       aa += bs;
241597b48c8fSBarry Smith     }
2416dbbe0bcdSBarry Smith     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
241797b48c8fSBarry Smith   }
24189566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
241997b48c8fSBarry Smith   PetscFunctionReturn(0);
242097b48c8fSBarry Smith }
242197b48c8fSBarry Smith 
24229371c9d4SSatish Balay PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) {
24232d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2424e2ee6c50SBarry Smith   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2425c1ac3661SBarry Smith   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2426d0f46423SBarry Smith   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2427c1ac3661SBarry Smith   PetscInt     ridx, cidx, bs2                 = a->bs2;
2428ace3abfcSBarry Smith   PetscBool    roworiented = a->roworiented;
2429d8cdefa3SHong Zhang   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
24302d61bbb3SSatish Balay 
24312d61bbb3SSatish Balay   PetscFunctionBegin;
24322d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over added rows */
2433085a36d4SBarry Smith     row  = im[k];
2434085a36d4SBarry Smith     brow = row / bs;
24355ef9f2a5SBarry Smith     if (row < 0) continue;
24366bdcaf15SBarry Smith     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
24372d61bbb3SSatish Balay     rp = aj + ai[brow];
2438672ba085SHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[brow];
24392d61bbb3SSatish Balay     rmax = imax[brow];
24402d61bbb3SSatish Balay     nrow = ailen[brow];
24412d61bbb3SSatish Balay     low  = 0;
2442c71e6ed7SBarry Smith     high = nrow;
24432d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over added columns */
24445ef9f2a5SBarry Smith       if (in[l] < 0) continue;
24456bdcaf15SBarry Smith       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
24469371c9d4SSatish Balay       col  = in[l];
24479371c9d4SSatish Balay       bcol = col / bs;
24489371c9d4SSatish Balay       ridx = row % bs;
24499371c9d4SSatish Balay       cidx = col % bs;
2450672ba085SHong Zhang       if (!A->structure_only) {
24512d61bbb3SSatish Balay         if (roworiented) {
24525ef9f2a5SBarry Smith           value = v[l + k * n];
24532d61bbb3SSatish Balay         } else {
24542d61bbb3SSatish Balay           value = v[k + l * m];
24552d61bbb3SSatish Balay         }
2456672ba085SHong Zhang       }
24579371c9d4SSatish Balay       if (col <= lastcol) low = 0;
24589371c9d4SSatish Balay       else high = nrow;
2459e2ee6c50SBarry Smith       lastcol = col;
24602d61bbb3SSatish Balay       while (high - low > 7) {
24612d61bbb3SSatish Balay         t = (low + high) / 2;
24622d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
24632d61bbb3SSatish Balay         else low = t;
24642d61bbb3SSatish Balay       }
24652d61bbb3SSatish Balay       for (i = low; i < high; i++) {
24662d61bbb3SSatish Balay         if (rp[i] > bcol) break;
24672d61bbb3SSatish Balay         if (rp[i] == bcol) {
24682d61bbb3SSatish Balay           bap = ap + bs2 * i + bs * cidx + ridx;
2469672ba085SHong Zhang           if (!A->structure_only) {
24702d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
24712d61bbb3SSatish Balay             else *bap = value;
2472672ba085SHong Zhang           }
24732d61bbb3SSatish Balay           goto noinsert1;
24742d61bbb3SSatish Balay         }
24752d61bbb3SSatish Balay       }
24762d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
24775f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2478672ba085SHong Zhang       if (A->structure_only) {
2479672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2480672ba085SHong Zhang       } else {
2481fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2482672ba085SHong Zhang       }
24839371c9d4SSatish Balay       N = nrow++ - 1;
24849371c9d4SSatish Balay       high++;
24852d61bbb3SSatish Balay       /* shift up all the later entries in this row */
24869566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
24872d61bbb3SSatish Balay       rp[i] = bcol;
2488580bdb30SBarry Smith       if (!A->structure_only) {
24899566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
24909566063dSJacob Faibussowitsch         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2491580bdb30SBarry Smith         ap[bs2 * i + bs * cidx + ridx] = value;
2492580bdb30SBarry Smith       }
2493085a36d4SBarry Smith       a->nz++;
2494e56f5c9eSBarry Smith       A->nonzerostate++;
24952d61bbb3SSatish Balay     noinsert1:;
24962d61bbb3SSatish Balay       low = i;
24972d61bbb3SSatish Balay     }
24982d61bbb3SSatish Balay     ailen[brow] = nrow;
24992d61bbb3SSatish Balay   }
25002d61bbb3SSatish Balay   PetscFunctionReturn(0);
25012d61bbb3SSatish Balay }
25022d61bbb3SSatish Balay 
25039371c9d4SSatish Balay PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) {
25042d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
25052d61bbb3SSatish Balay   Mat          outA;
2506ace3abfcSBarry Smith   PetscBool    row_identity, col_identity;
25072d61bbb3SSatish Balay 
25082d61bbb3SSatish Balay   PetscFunctionBegin;
25095f80ce2aSJacob Faibussowitsch   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
25109566063dSJacob Faibussowitsch   PetscCall(ISIdentity(row, &row_identity));
25119566063dSJacob Faibussowitsch   PetscCall(ISIdentity(col, &col_identity));
25125f80ce2aSJacob Faibussowitsch   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
25132d61bbb3SSatish Balay 
25142d61bbb3SSatish Balay   outA            = inA;
2515d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
25169566063dSJacob Faibussowitsch   PetscCall(PetscFree(inA->solvertype));
25179566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
25182d61bbb3SSatish Balay 
25199566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2520cf242676SKris Buschelman 
25219566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)row));
25229566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
2523c3122656SLisandro Dalcin   a->row = row;
25249566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)col));
25259566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
2526c3122656SLisandro Dalcin   a->col = col;
2527c38d4ed2SBarry Smith 
2528c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
25299566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
25309566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2531c38d4ed2SBarry Smith 
25329566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2533*4dfa11a4SJacob Faibussowitsch   if (!a->solve_work) { PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); }
25349566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(outA, inA, info));
25352d61bbb3SSatish Balay   PetscFunctionReturn(0);
25362d61bbb3SSatish Balay }
2537d9b7c43dSSatish Balay 
25389371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, PetscInt *indices) {
253927a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2540bdb1c0e1SJed Brown   PetscInt     i, nz, mbs;
254127a8da17SBarry Smith 
254227a8da17SBarry Smith   PetscFunctionBegin;
2543b32cb4a7SJed Brown   nz  = baij->maxnz;
2544bdb1c0e1SJed Brown   mbs = baij->mbs;
2545ad540459SPierre Jolivet   for (i = 0; i < nz; i++) baij->j[i] = indices[i];
254627a8da17SBarry Smith   baij->nz = nz;
2547ad540459SPierre Jolivet   for (i = 0; i < mbs; i++) baij->ilen[i] = baij->imax[i];
254827a8da17SBarry Smith   PetscFunctionReturn(0);
254927a8da17SBarry Smith }
255027a8da17SBarry Smith 
255127a8da17SBarry Smith /*@
255211a5261eSBarry Smith     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix.
255327a8da17SBarry Smith 
255427a8da17SBarry Smith   Input Parameters:
255511a5261eSBarry Smith +  mat - the `MATSEQBAIJ` matrix
255627a8da17SBarry Smith -  indices - the column indices
255727a8da17SBarry Smith 
255815091d37SBarry Smith   Level: advanced
255915091d37SBarry Smith 
256027a8da17SBarry Smith   Notes:
256127a8da17SBarry Smith     This can be called if you have precomputed the nonzero structure of the
256227a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
256311a5261eSBarry Smith   of the `MatSetValues()` operation.
256427a8da17SBarry Smith 
256527a8da17SBarry Smith     You MUST have set the correct numbers of nonzeros per row in the call to
256611a5261eSBarry Smith   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
256727a8da17SBarry Smith 
256811a5261eSBarry Smith     MUST be called before any calls to `MatSetValues()`
256927a8da17SBarry Smith 
257011a5261eSBarry Smith .seealso: `MATSEQBAIJ`, `MatSetValues()`
257127a8da17SBarry Smith @*/
25729371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) {
257327a8da17SBarry Smith   PetscFunctionBegin;
25740700a824SBarry Smith   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2575dadcf809SJacob Faibussowitsch   PetscValidIntPointer(indices, 2);
2576cac4c232SBarry Smith   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
257727a8da17SBarry Smith   PetscFunctionReturn(0);
257827a8da17SBarry Smith }
257927a8da17SBarry Smith 
25809371c9d4SSatish Balay PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) {
2581273d9f13SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2582c1ac3661SBarry Smith   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2583273d9f13SBarry Smith   PetscReal    atmp;
258487828ca2SBarry Smith   PetscScalar *x, zero = 0.0;
2585273d9f13SBarry Smith   MatScalar   *aa;
2586c1ac3661SBarry Smith   PetscInt     ncols, brow, krow, kcol;
2587273d9f13SBarry Smith 
2588273d9f13SBarry Smith   PetscFunctionBegin;
25895f80ce2aSJacob Faibussowitsch   /* why is this not a macro???????????????????????????????????????????????????????????????? */
25905f80ce2aSJacob Faibussowitsch   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2591d0f46423SBarry Smith   bs  = A->rmap->bs;
2592273d9f13SBarry Smith   aa  = a->a;
2593273d9f13SBarry Smith   ai  = a->i;
2594273d9f13SBarry Smith   aj  = a->j;
2595273d9f13SBarry Smith   mbs = a->mbs;
2596273d9f13SBarry Smith 
25979566063dSJacob Faibussowitsch   PetscCall(VecSet(v, zero));
25989566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
25999566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(v, &n));
26005f80ce2aSJacob Faibussowitsch   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2601273d9f13SBarry Smith   for (i = 0; i < mbs; i++) {
26029371c9d4SSatish Balay     ncols = ai[1] - ai[0];
26039371c9d4SSatish Balay     ai++;
2604273d9f13SBarry Smith     brow = bs * i;
2605273d9f13SBarry Smith     for (j = 0; j < ncols; j++) {
2606273d9f13SBarry Smith       for (kcol = 0; kcol < bs; kcol++) {
2607273d9f13SBarry Smith         for (krow = 0; krow < bs; krow++) {
26089371c9d4SSatish Balay           atmp = PetscAbsScalar(*aa);
26099371c9d4SSatish Balay           aa++;
2610273d9f13SBarry Smith           row = brow + krow; /* row index */
26119371c9d4SSatish Balay           if (PetscAbsScalar(x[row]) < atmp) {
26129371c9d4SSatish Balay             x[row] = atmp;
26139371c9d4SSatish Balay             if (idx) idx[row] = bs * (*aj) + kcol;
26149371c9d4SSatish Balay           }
2615273d9f13SBarry Smith         }
2616273d9f13SBarry Smith       }
2617273d9f13SBarry Smith       aj++;
2618273d9f13SBarry Smith     }
2619273d9f13SBarry Smith   }
26209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
2621273d9f13SBarry Smith   PetscFunctionReturn(0);
2622273d9f13SBarry Smith }
2623273d9f13SBarry Smith 
26249371c9d4SSatish Balay PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) {
26253c896bc6SHong Zhang   PetscFunctionBegin;
26263c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
26273c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
26283c896bc6SHong Zhang     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
26293c896bc6SHong Zhang     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2630d88c0aacSHong Zhang     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
26313c896bc6SHong Zhang 
26325f80ce2aSJacob Faibussowitsch     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
26335f80ce2aSJacob Faibussowitsch     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
26349566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
26359566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)B));
26363c896bc6SHong Zhang   } else {
26379566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
26383c896bc6SHong Zhang   }
26393c896bc6SHong Zhang   PetscFunctionReturn(0);
26403c896bc6SHong Zhang }
26413c896bc6SHong Zhang 
26429371c9d4SSatish Balay PetscErrorCode MatSetUp_SeqBAIJ(Mat A) {
2643273d9f13SBarry Smith   PetscFunctionBegin;
26449566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL));
2645273d9f13SBarry Smith   PetscFunctionReturn(0);
2646273d9f13SBarry Smith }
2647273d9f13SBarry Smith 
26489371c9d4SSatish Balay static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) {
2649f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
26506e111a19SKarl Rupp 
2651f2a5309cSSatish Balay   PetscFunctionBegin;
2652f2a5309cSSatish Balay   *array = a->a;
2653f2a5309cSSatish Balay   PetscFunctionReturn(0);
2654f2a5309cSSatish Balay }
2655f2a5309cSSatish Balay 
26569371c9d4SSatish Balay static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) {
2657f2a5309cSSatish Balay   PetscFunctionBegin;
2658cda14afcSprj-   *array = NULL;
2659f2a5309cSSatish Balay   PetscFunctionReturn(0);
2660f2a5309cSSatish Balay }
2661f2a5309cSSatish Balay 
26629371c9d4SSatish Balay PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) {
2663b264fe52SHong Zhang   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
266452768537SHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
266552768537SHong Zhang   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
266652768537SHong Zhang 
266752768537SHong Zhang   PetscFunctionBegin;
266852768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
26699566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
267052768537SHong Zhang   PetscFunctionReturn(0);
267152768537SHong Zhang }
267252768537SHong Zhang 
26739371c9d4SSatish Balay PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
267442ee4b1aSHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
267531ce2d13SHong Zhang   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2676e838b9e7SJed Brown   PetscBLASInt one = 1;
267742ee4b1aSHong Zhang 
267842ee4b1aSHong Zhang   PetscFunctionBegin;
2679134adf20SPierre Jolivet   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2680134adf20SPierre Jolivet     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2681134adf20SPierre Jolivet     if (e) {
26829566063dSJacob Faibussowitsch       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2683134adf20SPierre Jolivet       if (e) {
26849566063dSJacob Faibussowitsch         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2685134adf20SPierre Jolivet         if (e) str = SAME_NONZERO_PATTERN;
2686134adf20SPierre Jolivet       }
2687134adf20SPierre Jolivet     }
268854c59aa7SJacob Faibussowitsch     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2689134adf20SPierre Jolivet   }
269042ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2691f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2692c5df96a5SBarry Smith     PetscBLASInt bnz;
26939566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2694792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
26959566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2696ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
26979566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
269842ee4b1aSHong Zhang   } else {
269952768537SHong Zhang     Mat       B;
270052768537SHong Zhang     PetscInt *nnz;
270154c59aa7SJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
27029566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
27039566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
27049566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
27059566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
27069566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
27079566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
27089566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
27099566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
27109566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
27119566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
27129566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
271342ee4b1aSHong Zhang   }
271442ee4b1aSHong Zhang   PetscFunctionReturn(0);
271542ee4b1aSHong Zhang }
271642ee4b1aSHong Zhang 
27179371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) {
27182726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX)
27192726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27202726fb6dSPierre Jolivet   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
27212726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
27222726fb6dSPierre Jolivet 
27232726fb6dSPierre Jolivet   PetscFunctionBegin;
27242726fb6dSPierre Jolivet   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
27252726fb6dSPierre Jolivet #else
27262726fb6dSPierre Jolivet   PetscFunctionBegin;
27272726fb6dSPierre Jolivet #endif
27282726fb6dSPierre Jolivet   PetscFunctionReturn(0);
27292726fb6dSPierre Jolivet }
27302726fb6dSPierre Jolivet 
27319371c9d4SSatish Balay PetscErrorCode MatRealPart_SeqBAIJ(Mat A) {
273299cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
273399cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2734dd6ea824SBarry Smith   MatScalar   *aa = a->a;
273599cafbc1SBarry Smith 
273699cafbc1SBarry Smith   PetscFunctionBegin;
273799cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
273899cafbc1SBarry Smith   PetscFunctionReturn(0);
273999cafbc1SBarry Smith }
274099cafbc1SBarry Smith 
27419371c9d4SSatish Balay PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) {
274299cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
274399cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2744dd6ea824SBarry Smith   MatScalar   *aa = a->a;
274599cafbc1SBarry Smith 
274699cafbc1SBarry Smith   PetscFunctionBegin;
274799cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
274899cafbc1SBarry Smith   PetscFunctionReturn(0);
274999cafbc1SBarry Smith }
275099cafbc1SBarry Smith 
27513acb8795SBarry Smith /*
27522479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
27533acb8795SBarry Smith */
27549371c9d4SSatish Balay PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
27553acb8795SBarry Smith   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
27563acb8795SBarry Smith   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
27573acb8795SBarry Smith   PetscInt     nz = a->i[m], row, *jj, mr, col;
27583acb8795SBarry Smith 
27593acb8795SBarry Smith   PetscFunctionBegin;
27603acb8795SBarry Smith   *nn = n;
27613acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
27625f80ce2aSJacob Faibussowitsch   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
27639566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
27649566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
27659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
27663acb8795SBarry Smith   jj = a->j;
2767ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
27683acb8795SBarry Smith   cia[0] = oshift;
2769ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
27709566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
27713acb8795SBarry Smith   jj = a->j;
27723acb8795SBarry Smith   for (row = 0; row < m; row++) {
27733acb8795SBarry Smith     mr = a->i[row + 1] - a->i[row];
27743acb8795SBarry Smith     for (i = 0; i < mr; i++) {
27753acb8795SBarry Smith       col = *jj++;
277626fbe8dcSKarl Rupp 
27773acb8795SBarry Smith       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
27783acb8795SBarry Smith     }
27793acb8795SBarry Smith   }
27809566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
27819371c9d4SSatish Balay   *ia = cia;
27829371c9d4SSatish Balay   *ja = cja;
27833acb8795SBarry Smith   PetscFunctionReturn(0);
27843acb8795SBarry Smith }
27853acb8795SBarry Smith 
27869371c9d4SSatish Balay PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
27873acb8795SBarry Smith   PetscFunctionBegin;
27883acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
27899566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ia));
27909566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ja));
27913acb8795SBarry Smith   PetscFunctionReturn(0);
27923acb8795SBarry Smith }
27933acb8795SBarry Smith 
2794525d23c0SHong Zhang /*
2795525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2796525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2797040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2798525d23c0SHong Zhang  */
27999371c9d4SSatish Balay PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) {
2800525d23c0SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2801c0349474SHong Zhang   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2802525d23c0SHong Zhang   PetscInt     nz = a->i[m], row, *jj, mr, col;
2803525d23c0SHong Zhang   PetscInt    *cspidx;
2804f6d58c54SBarry Smith 
2805f6d58c54SBarry Smith   PetscFunctionBegin;
2806525d23c0SHong Zhang   *nn = n;
2807525d23c0SHong Zhang   if (!ia) PetscFunctionReturn(0);
2808f6d58c54SBarry Smith 
28099566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cspidx));
2813525d23c0SHong Zhang   jj = a->j;
2814ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2815525d23c0SHong Zhang   cia[0] = oshift;
2816ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28179566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
2818525d23c0SHong Zhang   jj = a->j;
2819525d23c0SHong Zhang   for (row = 0; row < m; row++) {
2820525d23c0SHong Zhang     mr = a->i[row + 1] - a->i[row];
2821525d23c0SHong Zhang     for (i = 0; i < mr; i++) {
2822525d23c0SHong Zhang       col                                         = *jj++;
2823525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2824525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2825525d23c0SHong Zhang     }
2826525d23c0SHong Zhang   }
28279566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
2828071fcb05SBarry Smith   *ia    = cia;
2829071fcb05SBarry Smith   *ja    = cja;
2830525d23c0SHong Zhang   *spidx = cspidx;
2831525d23c0SHong Zhang   PetscFunctionReturn(0);
2832f6d58c54SBarry Smith }
2833f6d58c54SBarry Smith 
28349371c9d4SSatish Balay PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) {
2835525d23c0SHong Zhang   PetscFunctionBegin;
28369566063dSJacob Faibussowitsch   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
28379566063dSJacob Faibussowitsch   PetscCall(PetscFree(*spidx));
2838f6d58c54SBarry Smith   PetscFunctionReturn(0);
2839f6d58c54SBarry Smith }
284099cafbc1SBarry Smith 
28419371c9d4SSatish Balay PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) {
28427d68702bSBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
28437d68702bSBarry Smith 
28447d68702bSBarry Smith   PetscFunctionBegin;
284548a46eb9SPierre Jolivet   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
28469566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
28477d68702bSBarry Smith   PetscFunctionReturn(0);
28487d68702bSBarry Smith }
28497d68702bSBarry Smith 
28502593348eSBarry Smith /* -------------------------------------------------------------------*/
28519371c9d4SSatish Balay static struct _MatOps MatOps_Values = {
28529371c9d4SSatish Balay   MatSetValues_SeqBAIJ,
2853cc2dc46cSBarry Smith   MatGetRow_SeqBAIJ,
2854cc2dc46cSBarry Smith   MatRestoreRow_SeqBAIJ,
2855cc2dc46cSBarry Smith   MatMult_SeqBAIJ_N,
285697304618SKris Buschelman   /* 4*/ MatMultAdd_SeqBAIJ_N,
28577c922b88SBarry Smith   MatMultTranspose_SeqBAIJ,
28587c922b88SBarry Smith   MatMultTransposeAdd_SeqBAIJ,
2859f4259b30SLisandro Dalcin   NULL,
2860f4259b30SLisandro Dalcin   NULL,
2861f4259b30SLisandro Dalcin   NULL,
2862f4259b30SLisandro Dalcin   /* 10*/ NULL,
2863cc2dc46cSBarry Smith   MatLUFactor_SeqBAIJ,
2864f4259b30SLisandro Dalcin   NULL,
2865f4259b30SLisandro Dalcin   NULL,
2866f2501298SSatish Balay   MatTranspose_SeqBAIJ,
286797304618SKris Buschelman   /* 15*/ MatGetInfo_SeqBAIJ,
2868cc2dc46cSBarry Smith   MatEqual_SeqBAIJ,
2869cc2dc46cSBarry Smith   MatGetDiagonal_SeqBAIJ,
2870cc2dc46cSBarry Smith   MatDiagonalScale_SeqBAIJ,
2871cc2dc46cSBarry Smith   MatNorm_SeqBAIJ,
2872f4259b30SLisandro Dalcin   /* 20*/ NULL,
2873cc2dc46cSBarry Smith   MatAssemblyEnd_SeqBAIJ,
2874cc2dc46cSBarry Smith   MatSetOption_SeqBAIJ,
2875cc2dc46cSBarry Smith   MatZeroEntries_SeqBAIJ,
2876d519adbfSMatthew Knepley   /* 24*/ MatZeroRows_SeqBAIJ,
2877f4259b30SLisandro Dalcin   NULL,
2878f4259b30SLisandro Dalcin   NULL,
2879f4259b30SLisandro Dalcin   NULL,
2880f4259b30SLisandro Dalcin   NULL,
28814994cf47SJed Brown   /* 29*/ MatSetUp_SeqBAIJ,
2882f4259b30SLisandro Dalcin   NULL,
2883f4259b30SLisandro Dalcin   NULL,
2884f4259b30SLisandro Dalcin   NULL,
2885f4259b30SLisandro Dalcin   NULL,
2886d519adbfSMatthew Knepley   /* 34*/ MatDuplicate_SeqBAIJ,
2887f4259b30SLisandro Dalcin   NULL,
2888f4259b30SLisandro Dalcin   NULL,
2889cc2dc46cSBarry Smith   MatILUFactor_SeqBAIJ,
2890f4259b30SLisandro Dalcin   NULL,
2891d519adbfSMatthew Knepley   /* 39*/ MatAXPY_SeqBAIJ,
28927dae84e0SHong Zhang   MatCreateSubMatrices_SeqBAIJ,
2893cc2dc46cSBarry Smith   MatIncreaseOverlap_SeqBAIJ,
2894cc2dc46cSBarry Smith   MatGetValues_SeqBAIJ,
28953c896bc6SHong Zhang   MatCopy_SeqBAIJ,
2896f4259b30SLisandro Dalcin   /* 44*/ NULL,
2897cc2dc46cSBarry Smith   MatScale_SeqBAIJ,
28987d68702bSBarry Smith   MatShift_SeqBAIJ,
2899f4259b30SLisandro Dalcin   NULL,
290097b48c8fSBarry Smith   MatZeroRowsColumns_SeqBAIJ,
2901f4259b30SLisandro Dalcin   /* 49*/ NULL,
29023b2fbd54SBarry Smith   MatGetRowIJ_SeqBAIJ,
290392c4ed94SBarry Smith   MatRestoreRowIJ_SeqBAIJ,
29043acb8795SBarry Smith   MatGetColumnIJ_SeqBAIJ,
29053acb8795SBarry Smith   MatRestoreColumnIJ_SeqBAIJ,
290693dfae19SHong Zhang   /* 54*/ MatFDColoringCreate_SeqXAIJ,
2907f4259b30SLisandro Dalcin   NULL,
2908f4259b30SLisandro Dalcin   NULL,
2909090001bdSToby Isaac   NULL,
2910d3825aa8SBarry Smith   MatSetValuesBlocked_SeqBAIJ,
29117dae84e0SHong Zhang   /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2912b9b97703SBarry Smith   MatDestroy_SeqBAIJ,
2913b9b97703SBarry Smith   MatView_SeqBAIJ,
2914f4259b30SLisandro Dalcin   NULL,
2915f4259b30SLisandro Dalcin   NULL,
2916f4259b30SLisandro Dalcin   /* 64*/ NULL,
2917f4259b30SLisandro Dalcin   NULL,
2918f4259b30SLisandro Dalcin   NULL,
2919f4259b30SLisandro Dalcin   NULL,
2920f4259b30SLisandro Dalcin   NULL,
2921d519adbfSMatthew Knepley   /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2922f4259b30SLisandro Dalcin   NULL,
2923c87e5d42SMatthew Knepley   MatConvert_Basic,
2924f4259b30SLisandro Dalcin   NULL,
2925f4259b30SLisandro Dalcin   NULL,
2926f4259b30SLisandro Dalcin   /* 74*/ NULL,
2927f6d58c54SBarry Smith   MatFDColoringApply_BAIJ,
2928f4259b30SLisandro Dalcin   NULL,
2929f4259b30SLisandro Dalcin   NULL,
2930f4259b30SLisandro Dalcin   NULL,
2931f4259b30SLisandro Dalcin   /* 79*/ NULL,
2932f4259b30SLisandro Dalcin   NULL,
2933f4259b30SLisandro Dalcin   NULL,
2934f4259b30SLisandro Dalcin   NULL,
29355bba2384SShri Abhyankar   MatLoad_SeqBAIJ,
2936f4259b30SLisandro Dalcin   /* 84*/ NULL,
2937f4259b30SLisandro Dalcin   NULL,
2938f4259b30SLisandro Dalcin   NULL,
2939f4259b30SLisandro Dalcin   NULL,
2940f4259b30SLisandro Dalcin   NULL,
2941f4259b30SLisandro Dalcin   /* 89*/ NULL,
2942f4259b30SLisandro Dalcin   NULL,
2943f4259b30SLisandro Dalcin   NULL,
2944f4259b30SLisandro Dalcin   NULL,
2945f4259b30SLisandro Dalcin   NULL,
2946f4259b30SLisandro Dalcin   /* 94*/ NULL,
2947f4259b30SLisandro Dalcin   NULL,
2948f4259b30SLisandro Dalcin   NULL,
2949f4259b30SLisandro Dalcin   NULL,
2950f4259b30SLisandro Dalcin   NULL,
2951f4259b30SLisandro Dalcin   /* 99*/ NULL,
2952f4259b30SLisandro Dalcin   NULL,
2953f4259b30SLisandro Dalcin   NULL,
29542726fb6dSPierre Jolivet   MatConjugate_SeqBAIJ,
2955f4259b30SLisandro Dalcin   NULL,
2956f4259b30SLisandro Dalcin   /*104*/ NULL,
295799cafbc1SBarry Smith   MatRealPart_SeqBAIJ,
29582af78befSBarry Smith   MatImaginaryPart_SeqBAIJ,
2959f4259b30SLisandro Dalcin   NULL,
2960f4259b30SLisandro Dalcin   NULL,
2961f4259b30SLisandro Dalcin   /*109*/ NULL,
2962f4259b30SLisandro Dalcin   NULL,
2963f4259b30SLisandro Dalcin   NULL,
2964f4259b30SLisandro Dalcin   NULL,
2965547795f9SHong Zhang   MatMissingDiagonal_SeqBAIJ,
2966f4259b30SLisandro Dalcin   /*114*/ NULL,
2967f4259b30SLisandro Dalcin   NULL,
2968f4259b30SLisandro Dalcin   NULL,
2969f4259b30SLisandro Dalcin   NULL,
2970f4259b30SLisandro Dalcin   NULL,
2971f4259b30SLisandro Dalcin   /*119*/ NULL,
2972f4259b30SLisandro Dalcin   NULL,
2973547795f9SHong Zhang   MatMultHermitianTranspose_SeqBAIJ,
2974d6037b41SHong Zhang   MatMultHermitianTransposeAdd_SeqBAIJ,
2975f4259b30SLisandro Dalcin   NULL,
2976f4259b30SLisandro Dalcin   /*124*/ NULL,
2977857cbf51SRichard Tran Mills   MatGetColumnReductions_SeqBAIJ,
29783964eb88SJed Brown   MatInvertBlockDiagonal_SeqBAIJ,
2979f4259b30SLisandro Dalcin   NULL,
2980f4259b30SLisandro Dalcin   NULL,
2981f4259b30SLisandro Dalcin   /*129*/ NULL,
2982f4259b30SLisandro Dalcin   NULL,
2983f4259b30SLisandro Dalcin   NULL,
2984f4259b30SLisandro Dalcin   NULL,
2985f4259b30SLisandro Dalcin   NULL,
2986f4259b30SLisandro Dalcin   /*134*/ NULL,
2987f4259b30SLisandro Dalcin   NULL,
2988f4259b30SLisandro Dalcin   NULL,
2989f4259b30SLisandro Dalcin   NULL,
2990f4259b30SLisandro Dalcin   NULL,
299146533700Sstefano_zampini   /*139*/ MatSetBlockSizes_Default,
2992f4259b30SLisandro Dalcin   NULL,
2993f4259b30SLisandro Dalcin   NULL,
2994bdf6f3fcSHong Zhang   MatFDColoringSetUp_SeqXAIJ,
2995f4259b30SLisandro Dalcin   NULL,
299686e85357SHong Zhang   /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2997d70f29a3SPierre Jolivet   MatDestroySubMatrices_SeqBAIJ,
2998d70f29a3SPierre Jolivet   NULL,
299999a7f59eSMark Adams   NULL,
300099a7f59eSMark Adams   NULL,
30017fb60732SBarry Smith   NULL,
30027fb60732SBarry Smith   /*150*/ NULL,
300399cafbc1SBarry Smith };
30042593348eSBarry Smith 
30059371c9d4SSatish Balay PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) {
30063e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
30078ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
30083e90b805SBarry Smith 
30093e90b805SBarry Smith   PetscFunctionBegin;
30105f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
30113e90b805SBarry Smith 
30123e90b805SBarry Smith   /* allocate space for values if not already there */
3013*4dfa11a4SJacob Faibussowitsch   if (!aij->saved_values) { PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); }
30143e90b805SBarry Smith 
30153e90b805SBarry Smith   /* copy values over */
30169566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
30173e90b805SBarry Smith   PetscFunctionReturn(0);
30183e90b805SBarry Smith }
30193e90b805SBarry Smith 
30209371c9d4SSatish Balay PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) {
30213e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
30228ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
30233e90b805SBarry Smith 
30243e90b805SBarry Smith   PetscFunctionBegin;
30255f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
30265f80ce2aSJacob Faibussowitsch   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
30273e90b805SBarry Smith 
30283e90b805SBarry Smith   /* copy values over */
30299566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
30303e90b805SBarry Smith   PetscFunctionReturn(0);
30313e90b805SBarry Smith }
30323e90b805SBarry Smith 
3033cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3034cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3035273d9f13SBarry Smith 
30369371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) {
3037a23d5eceSKris Buschelman   Mat_SeqBAIJ *b;
3038535b19f3SBarry Smith   PetscInt     i, mbs, nbs, bs2;
30398afaa268SBarry Smith   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3040a23d5eceSKris Buschelman 
3041a23d5eceSKris Buschelman   PetscFunctionBegin;
30422576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3043ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
3044ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
3045ab93d7beSBarry Smith     nz             = 0;
3046ab93d7beSBarry Smith   }
30478c07d4e3SBarry Smith 
30489566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
30499566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
30509566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
30519566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3052899cda47SBarry Smith 
3053899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
3054899cda47SBarry Smith 
3055d0f46423SBarry Smith   mbs = B->rmap->n / bs;
3056d0f46423SBarry Smith   nbs = B->cmap->n / bs;
3057a23d5eceSKris Buschelman   bs2 = bs * bs;
3058a23d5eceSKris Buschelman 
30595f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3060a23d5eceSKris Buschelman 
3061a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
30625f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3063a23d5eceSKris Buschelman   if (nnz) {
3064a23d5eceSKris Buschelman     for (i = 0; i < mbs; i++) {
30655f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
30665f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3067a23d5eceSKris Buschelman     }
3068a23d5eceSKris Buschelman   }
3069a23d5eceSKris Buschelman 
3070a23d5eceSKris Buschelman   b = (Mat_SeqBAIJ *)B->data;
3071d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
30729566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3073d0609cedSBarry Smith   PetscOptionsEnd();
30748c07d4e3SBarry Smith 
3075a23d5eceSKris Buschelman   if (!flg) {
3076a23d5eceSKris Buschelman     switch (bs) {
3077a23d5eceSKris Buschelman     case 1:
3078a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
3079a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3080a23d5eceSKris Buschelman       break;
3081a23d5eceSKris Buschelman     case 2:
3082a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
3083a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3084a23d5eceSKris Buschelman       break;
3085a23d5eceSKris Buschelman     case 3:
3086a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
3087a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3088a23d5eceSKris Buschelman       break;
3089a23d5eceSKris Buschelman     case 4:
3090a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
3091a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3092a23d5eceSKris Buschelman       break;
3093a23d5eceSKris Buschelman     case 5:
3094a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
3095a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3096a23d5eceSKris Buschelman       break;
3097a23d5eceSKris Buschelman     case 6:
3098a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
3099a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3100a23d5eceSKris Buschelman       break;
3101a23d5eceSKris Buschelman     case 7:
3102a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
3103a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3104a23d5eceSKris Buschelman       break;
31059371c9d4SSatish Balay     case 9: {
31066679dcc1SBarry Smith       PetscInt version = 1;
31079566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
31086679dcc1SBarry Smith       switch (version) {
31095f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
31106679dcc1SBarry Smith       case 1:
311196e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
311296e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
31139566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
31146679dcc1SBarry Smith         break;
31156679dcc1SBarry Smith #endif
31166679dcc1SBarry Smith       default:
311796e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
311896e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
31199566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
312096e086a2SDaniel Kokron         break;
31216679dcc1SBarry Smith       }
31226679dcc1SBarry Smith       break;
31236679dcc1SBarry Smith     }
3124ebada01fSBarry Smith     case 11:
3125ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
3126ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3127ebada01fSBarry Smith       break;
31289371c9d4SSatish Balay     case 12: {
31296679dcc1SBarry Smith       PetscInt version = 1;
31309566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
31316679dcc1SBarry Smith       switch (version) {
31326679dcc1SBarry Smith       case 1:
31336679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
31346679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
31359566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31368ab949d8SShri Abhyankar         break;
31376679dcc1SBarry Smith       case 2:
31386679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
31396679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
31409566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31416679dcc1SBarry Smith         break;
31426679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
31436679dcc1SBarry Smith       case 3:
31446679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
31456679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
31469566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
31476679dcc1SBarry Smith         break;
31486679dcc1SBarry Smith #endif
3149a23d5eceSKris Buschelman       default:
3150a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
3151a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
31529566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
31536679dcc1SBarry Smith         break;
31546679dcc1SBarry Smith       }
31556679dcc1SBarry Smith       break;
31566679dcc1SBarry Smith     }
31579371c9d4SSatish Balay     case 15: {
31586679dcc1SBarry Smith       PetscInt version = 1;
31599566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
31606679dcc1SBarry Smith       switch (version) {
31616679dcc1SBarry Smith       case 1:
31626679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
31639566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31646679dcc1SBarry Smith         break;
31656679dcc1SBarry Smith       case 2:
31666679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
31679566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31686679dcc1SBarry Smith         break;
31696679dcc1SBarry Smith       case 3:
31706679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
31719566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31726679dcc1SBarry Smith         break;
31736679dcc1SBarry Smith       case 4:
31746679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
31759566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
31766679dcc1SBarry Smith         break;
31776679dcc1SBarry Smith       default:
31786679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_N;
31799566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
31806679dcc1SBarry Smith         break;
31816679dcc1SBarry Smith       }
31826679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
31836679dcc1SBarry Smith       break;
31846679dcc1SBarry Smith     }
31856679dcc1SBarry Smith     default:
31866679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
31876679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
31889566063dSJacob Faibussowitsch       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3189a23d5eceSKris Buschelman       break;
3190a23d5eceSKris Buschelman     }
3191a23d5eceSKris Buschelman   }
3192e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
3193a23d5eceSKris Buschelman   b->mbs      = mbs;
3194a23d5eceSKris Buschelman   b->nbs      = nbs;
3195ab93d7beSBarry Smith   if (!skipallocation) {
31962ee49352SLisandro Dalcin     if (!b->imax) {
31979566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
319826fbe8dcSKarl Rupp 
31994fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
32002ee49352SLisandro Dalcin     }
3201ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
320226fbe8dcSKarl Rupp     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3203a23d5eceSKris Buschelman     if (!nnz) {
3204a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3205c62bd62aSJed Brown       else if (nz < 0) nz = 1;
32065d2a9ed1SStefano Zampini       nz = PetscMin(nz, nbs);
3207a23d5eceSKris Buschelman       for (i = 0; i < mbs; i++) b->imax[i] = nz;
32089566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, mbs, &nz));
3209a23d5eceSKris Buschelman     } else {
3210c73702f5SBarry Smith       PetscInt64 nz64 = 0;
32119371c9d4SSatish Balay       for (i = 0; i < mbs; i++) {
32129371c9d4SSatish Balay         b->imax[i] = nnz[i];
32139371c9d4SSatish Balay         nz64 += nnz[i];
32149371c9d4SSatish Balay       }
32159566063dSJacob Faibussowitsch       PetscCall(PetscIntCast(nz64, &nz));
3216a23d5eceSKris Buschelman     }
3217a23d5eceSKris Buschelman 
3218a23d5eceSKris Buschelman     /* allocate the matrix space */
32199566063dSJacob Faibussowitsch     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3220672ba085SHong Zhang     if (B->structure_only) {
32219566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz, &b->j));
32229566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
3223672ba085SHong Zhang     } else {
32246679dcc1SBarry Smith       PetscInt nzbs2 = 0;
32259566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
32269566063dSJacob Faibussowitsch       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
32279566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(b->a, nz * bs2));
3228672ba085SHong Zhang     }
32299566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(b->j, nz));
323026fbe8dcSKarl Rupp 
3231672ba085SHong Zhang     if (B->structure_only) {
3232672ba085SHong Zhang       b->singlemalloc = PETSC_FALSE;
3233672ba085SHong Zhang       b->free_a       = PETSC_FALSE;
3234672ba085SHong Zhang     } else {
3235a23d5eceSKris Buschelman       b->singlemalloc = PETSC_TRUE;
3236672ba085SHong Zhang       b->free_a       = PETSC_TRUE;
3237672ba085SHong Zhang     }
3238672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
3239672ba085SHong Zhang 
3240a23d5eceSKris Buschelman     b->i[0] = 0;
3241ad540459SPierre Jolivet     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3242672ba085SHong Zhang 
3243e811da20SHong Zhang   } else {
3244e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3245e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3246ab93d7beSBarry Smith   }
3247a23d5eceSKris Buschelman 
3248a23d5eceSKris Buschelman   b->bs2              = bs2;
3249a23d5eceSKris Buschelman   b->mbs              = mbs;
3250a23d5eceSKris Buschelman   b->nz               = 0;
3251b32cb4a7SJed Brown   b->maxnz            = nz;
3252b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3253cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3254cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
32559566063dSJacob Faibussowitsch   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3256a23d5eceSKris Buschelman   PetscFunctionReturn(0);
3257a23d5eceSKris Buschelman }
3258a23d5eceSKris Buschelman 
32599371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) {
3260725b52f3SLisandro Dalcin   PetscInt     i, m, nz, nz_max = 0, *nnz;
3261f4259b30SLisandro Dalcin   PetscScalar *values      = NULL;
3262d47bf9aaSJed Brown   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3263725b52f3SLisandro Dalcin 
3264725b52f3SLisandro Dalcin   PetscFunctionBegin;
32655f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
32669566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
32679566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
32689566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
32699566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
32709566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3271d0f46423SBarry Smith   m = B->rmap->n / bs;
3272725b52f3SLisandro Dalcin 
32735f80ce2aSJacob Faibussowitsch   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
32749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &nnz));
3275725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3276cf12db73SBarry Smith     nz = ii[i + 1] - ii[i];
32775f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3278725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3279725b52f3SLisandro Dalcin     nnz[i] = nz;
3280725b52f3SLisandro Dalcin   }
32819566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
32829566063dSJacob Faibussowitsch   PetscCall(PetscFree(nnz));
3283725b52f3SLisandro Dalcin 
3284725b52f3SLisandro Dalcin   values = (PetscScalar *)V;
328548a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3286725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3287cf12db73SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
3288cf12db73SBarry Smith     const PetscInt *icols = jj + ii[i];
3289bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3290cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
32919566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
32923adadaf3SJed Brown     } else {
32933adadaf3SJed Brown       PetscInt j;
32943adadaf3SJed Brown       for (j = 0; j < ncols; j++) {
32953adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
32969566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
32973adadaf3SJed Brown       }
32983adadaf3SJed Brown     }
3299725b52f3SLisandro Dalcin   }
33009566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
33019566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
33029566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
33039566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3304725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3305725b52f3SLisandro Dalcin }
3306725b52f3SLisandro Dalcin 
3307cda14afcSprj- /*@C
330811a5261eSBarry Smith    MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3309cda14afcSprj- 
3310cda14afcSprj-    Not Collective
3311cda14afcSprj- 
3312cda14afcSprj-    Input Parameter:
331311a5261eSBarry Smith .  mat - a `MATSEQBAIJ` matrix
3314cda14afcSprj- 
3315cda14afcSprj-    Output Parameter:
3316cda14afcSprj- .   array - pointer to the data
3317cda14afcSprj- 
3318cda14afcSprj-    Level: intermediate
3319cda14afcSprj- 
332011a5261eSBarry Smith .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3321cda14afcSprj- @*/
33229371c9d4SSatish Balay PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) {
3323cda14afcSprj-   PetscFunctionBegin;
3324cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3325cda14afcSprj-   PetscFunctionReturn(0);
3326cda14afcSprj- }
3327cda14afcSprj- 
3328cda14afcSprj- /*@C
332911a5261eSBarry Smith    MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3330cda14afcSprj- 
3331cda14afcSprj-    Not Collective
3332cda14afcSprj- 
3333cda14afcSprj-    Input Parameters:
333411a5261eSBarry Smith +  mat - a `MATSEQBAIJ` matrix
3335cda14afcSprj- -  array - pointer to the data
3336cda14afcSprj- 
3337cda14afcSprj-    Level: intermediate
3338cda14afcSprj- 
3339db781477SPatrick Sanan .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3340cda14afcSprj- @*/
33419371c9d4SSatish Balay PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) {
3342cda14afcSprj-   PetscFunctionBegin;
3343cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3344cda14afcSprj-   PetscFunctionReturn(0);
3345cda14afcSprj- }
3346cda14afcSprj- 
33470bad9183SKris Buschelman /*MC
3348fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
33490bad9183SKris Buschelman    block sparse compressed row format.
33500bad9183SKris Buschelman 
33510bad9183SKris Buschelman    Options Database Keys:
33526679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
33536679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
33540bad9183SKris Buschelman 
33550bad9183SKris Buschelman    Level: beginner
33560cd7f59aSBarry Smith 
33570cd7f59aSBarry Smith    Notes:
335811a5261eSBarry Smith     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
335911a5261eSBarry Smith     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
33600bad9183SKris Buschelman 
33616679dcc1SBarry Smith    Run with -info to see what version of the matrix-vector product is being used
33626679dcc1SBarry Smith 
3363db781477SPatrick Sanan .seealso: `MatCreateSeqBAIJ()`
33640bad9183SKris Buschelman M*/
33650bad9183SKris Buschelman 
3366cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3367b24902e0SBarry Smith 
33689371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) {
3369c1ac3661SBarry Smith   PetscMPIInt  size;
3370b6490206SBarry Smith   Mat_SeqBAIJ *b;
33713b2fbd54SBarry Smith 
33723a40ed3dSBarry Smith   PetscFunctionBegin;
33739566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
33745f80ce2aSJacob Faibussowitsch   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3375b6490206SBarry Smith 
3376*4dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
3377b0a32e0cSBarry Smith   B->data = (void *)b;
33789566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
337926fbe8dcSKarl Rupp 
3380f4259b30SLisandro Dalcin   b->row          = NULL;
3381f4259b30SLisandro Dalcin   b->col          = NULL;
3382f4259b30SLisandro Dalcin   b->icol         = NULL;
33832593348eSBarry Smith   b->reallocs     = 0;
3384f4259b30SLisandro Dalcin   b->saved_values = NULL;
33852593348eSBarry Smith 
3386c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
33872593348eSBarry Smith   b->nonew              = 0;
3388f4259b30SLisandro Dalcin   b->diag               = NULL;
3389f4259b30SLisandro Dalcin   B->spptr              = NULL;
3390b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3391a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
33924e220ebcSLois Curfman McInnes 
33939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
33949566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
33959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
33969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
33979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
33989566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
33999566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
34009566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
34019566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
34029566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
34037ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
34049566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
34057ea3e4caSstefano_zampini #endif
34069566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
34079566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
34083a40ed3dSBarry Smith   PetscFunctionReturn(0);
34092593348eSBarry Smith }
34102593348eSBarry Smith 
34119371c9d4SSatish Balay PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) {
3412b24902e0SBarry Smith   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3413a96a251dSBarry Smith   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3414de6a44a3SBarry Smith 
34153a40ed3dSBarry Smith   PetscFunctionBegin;
34165f80ce2aSJacob Faibussowitsch   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
34172593348eSBarry Smith 
34184fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
34194fd072dbSBarry Smith     c->imax           = a->imax;
34204fd072dbSBarry Smith     c->ilen           = a->ilen;
34214fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
34224fd072dbSBarry Smith   } else {
34239566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3424b6490206SBarry Smith     for (i = 0; i < mbs; i++) {
34252593348eSBarry Smith       c->imax[i] = a->imax[i];
34262593348eSBarry Smith       c->ilen[i] = a->ilen[i];
34272593348eSBarry Smith     }
34284fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
34294fd072dbSBarry Smith   }
34302593348eSBarry Smith 
34312593348eSBarry Smith   /* allocate the matrix space */
343216a2bf60SHong Zhang   if (mallocmatspace) {
34334fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
34349566063dSJacob Faibussowitsch       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
343526fbe8dcSKarl Rupp 
34364fd072dbSBarry Smith       c->i            = a->i;
34374fd072dbSBarry Smith       c->j            = a->j;
3438379be0ddSLisandro Dalcin       c->singlemalloc = PETSC_FALSE;
3439379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
3440379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
34414fd072dbSBarry Smith       c->parent       = A;
34421e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
34431e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
344426fbe8dcSKarl Rupp 
34459566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)A));
34469566063dSJacob Faibussowitsch       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
34479566063dSJacob Faibussowitsch       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
34484fd072dbSBarry Smith     } else {
34499566063dSJacob Faibussowitsch       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
345026fbe8dcSKarl Rupp 
3451c4992f7dSBarry Smith       c->singlemalloc = PETSC_TRUE;
3452379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
34534fd072dbSBarry Smith       c->free_ij      = PETSC_TRUE;
345426fbe8dcSKarl Rupp 
34559566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3456b6490206SBarry Smith       if (mbs > 0) {
34579566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(c->j, a->j, nz));
34582e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
34599566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
34602e8a6d31SBarry Smith         } else {
34619566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(c->a, bs2 * nz));
34622593348eSBarry Smith         }
34632593348eSBarry Smith       }
34641e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
34651e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
346616a2bf60SHong Zhang     }
34674fd072dbSBarry Smith   }
346816a2bf60SHong Zhang 
34692593348eSBarry Smith   c->roworiented = a->roworiented;
34702593348eSBarry Smith   c->nonew       = a->nonew;
347126fbe8dcSKarl Rupp 
34729566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
34739566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
347426fbe8dcSKarl Rupp 
34755c9eb25fSBarry Smith   c->bs2 = a->bs2;
34765c9eb25fSBarry Smith   c->mbs = a->mbs;
34775c9eb25fSBarry Smith   c->nbs = a->nbs;
34782593348eSBarry Smith 
34792593348eSBarry Smith   if (a->diag) {
34804fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
34814fd072dbSBarry Smith       c->diag      = a->diag;
34824fd072dbSBarry Smith       c->free_diag = PETSC_FALSE;
34834fd072dbSBarry Smith     } else {
34849566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
348526fbe8dcSKarl Rupp       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
34864fd072dbSBarry Smith       c->free_diag = PETSC_TRUE;
34874fd072dbSBarry Smith     }
3488f4259b30SLisandro Dalcin   } else c->diag = NULL;
348926fbe8dcSKarl Rupp 
34902593348eSBarry Smith   c->nz         = a->nz;
3491f2cbd3d5SJed Brown   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3492f361c04dSBarry Smith   c->solve_work = NULL;
3493f361c04dSBarry Smith   c->mult_work  = NULL;
3494f361c04dSBarry Smith   c->sor_workt  = NULL;
3495f361c04dSBarry Smith   c->sor_work   = NULL;
349688e51ccdSHong Zhang 
349788e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
349888e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3499cd6b891eSBarry Smith   if (a->compressedrow.use) {
350088e51ccdSHong Zhang     i = a->compressedrow.nrows;
35019566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
35029566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
35039566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
350488e51ccdSHong Zhang   } else {
350588e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
35060298fd71SBarry Smith     c->compressedrow.i      = NULL;
35070298fd71SBarry Smith     c->compressedrow.rindex = NULL;
350888e51ccdSHong Zhang   }
3509e56f5c9eSBarry Smith   C->nonzerostate = A->nonzerostate;
351026fbe8dcSKarl Rupp 
35119566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
35123a40ed3dSBarry Smith   PetscFunctionReturn(0);
35132593348eSBarry Smith }
35142593348eSBarry Smith 
35159371c9d4SSatish Balay PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) {
3516b24902e0SBarry Smith   PetscFunctionBegin;
35179566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
35189566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
35199566063dSJacob Faibussowitsch   PetscCall(MatSetType(*B, MATSEQBAIJ));
35209566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3521b24902e0SBarry Smith   PetscFunctionReturn(0);
3522b24902e0SBarry Smith }
3523b24902e0SBarry Smith 
3524618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
35259371c9d4SSatish Balay PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) {
3526b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3527b51a4376SLisandro Dalcin   PetscInt    *rowidxs, *colidxs;
3528b51a4376SLisandro Dalcin   PetscScalar *matvals;
3529b51a4376SLisandro Dalcin 
3530b51a4376SLisandro Dalcin   PetscFunctionBegin;
35319566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
3532b51a4376SLisandro Dalcin 
3533b51a4376SLisandro Dalcin   /* read matrix header */
35349566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
35355f80ce2aSJacob Faibussowitsch   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
35369371c9d4SSatish Balay   M  = header[1];
35379371c9d4SSatish Balay   N  = header[2];
35389371c9d4SSatish Balay   nz = header[3];
35395f80ce2aSJacob Faibussowitsch   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
35405f80ce2aSJacob Faibussowitsch   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
35415f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3542b51a4376SLisandro Dalcin 
3543b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
35449566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3545b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3546b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3547b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3548b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3549b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
35509566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
35519566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
3552b51a4376SLisandro Dalcin 
3553b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
35549566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
35555f80ce2aSJacob Faibussowitsch   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
35569566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
35579566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
35589371c9d4SSatish Balay   mbs = m / bs;
35599371c9d4SSatish Balay   nbs = n / bs;
3560b51a4376SLisandro Dalcin 
3561b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
35629566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
35639566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
35649371c9d4SSatish Balay   rowidxs[0] = 0;
35659371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3566b51a4376SLisandro Dalcin   sum = rowidxs[m];
35675f80ce2aSJacob Faibussowitsch   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3568b51a4376SLisandro Dalcin 
3569b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
35709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
35719566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
35729566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3573b51a4376SLisandro Dalcin 
3574b51a4376SLisandro Dalcin   {               /* preallocate matrix storage */
3575b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3576b51a4376SLisandro Dalcin     PetscInt *nnz;
3577618cc2edSLisandro Dalcin     PetscBool sbaij;
3578b51a4376SLisandro Dalcin 
35799566063dSJacob Faibussowitsch     PetscCall(PetscBTCreate(nbs, &bt));
35809566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(mbs, &nnz));
35819566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3582b51a4376SLisandro Dalcin     for (i = 0; i < mbs; i++) {
35839566063dSJacob Faibussowitsch       PetscCall(PetscBTMemzero(nbs, bt));
3584618cc2edSLisandro Dalcin       for (k = 0; k < bs; k++) {
3585618cc2edSLisandro Dalcin         PetscInt row = bs * i + k;
3586618cc2edSLisandro Dalcin         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3587618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3588618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3589618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3590618cc2edSLisandro Dalcin         }
3591618cc2edSLisandro Dalcin       }
3592b51a4376SLisandro Dalcin     }
35939566063dSJacob Faibussowitsch     PetscCall(PetscBTDestroy(&bt));
35949566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
35959566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
35969566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
3597b51a4376SLisandro Dalcin   }
3598b51a4376SLisandro Dalcin 
3599b51a4376SLisandro Dalcin   /* store matrix values */
3600b51a4376SLisandro Dalcin   for (i = 0; i < m; i++) {
3601b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
36029566063dSJacob Faibussowitsch     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3603b51a4376SLisandro Dalcin   }
3604b51a4376SLisandro Dalcin 
36059566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
36069566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
36079566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
36089566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3609b51a4376SLisandro Dalcin   PetscFunctionReturn(0);
3610b51a4376SLisandro Dalcin }
3611b51a4376SLisandro Dalcin 
36129371c9d4SSatish Balay PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) {
36137f489da9SVaclav Hapla   PetscBool isbinary;
3614f501eaabSShri Abhyankar 
3615f501eaabSShri Abhyankar   PetscFunctionBegin;
36169566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
36175f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
36189566063dSJacob Faibussowitsch   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3619f501eaabSShri Abhyankar   PetscFunctionReturn(0);
3620f501eaabSShri Abhyankar }
3621f501eaabSShri Abhyankar 
3622273d9f13SBarry Smith /*@C
362311a5261eSBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3624273d9f13SBarry Smith    compressed row) format.  For good matrix assembly performance the
3625273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3626273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3627273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
36282593348eSBarry Smith 
3629d083f849SBarry Smith    Collective
3630273d9f13SBarry Smith 
3631273d9f13SBarry Smith    Input Parameters:
363211a5261eSBarry Smith +  comm - MPI communicator, set to `PETSC_COMM_SELF`
363311a5261eSBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
363411a5261eSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3635273d9f13SBarry Smith .  m - number of rows
3636273d9f13SBarry Smith .  n - number of columns
363735d8aa7fSBarry Smith .  nz - number of nonzero blocks  per block row (same for all rows)
363835d8aa7fSBarry Smith -  nnz - array containing the number of nonzero blocks in the various block rows
36390298fd71SBarry Smith          (possibly different for each block row) or NULL
3640273d9f13SBarry Smith 
3641273d9f13SBarry Smith    Output Parameter:
3642273d9f13SBarry Smith .  A - the matrix
3643273d9f13SBarry Smith 
364411a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3645f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
364611a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3647175b88e8SBarry Smith 
3648273d9f13SBarry Smith    Options Database Keys:
364911a5261eSBarry Smith +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3650a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
3651273d9f13SBarry Smith 
3652273d9f13SBarry Smith    Level: intermediate
3653273d9f13SBarry Smith 
3654273d9f13SBarry Smith    Notes:
3655d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
3656d1be2dadSMatthew Knepley 
365749a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
365849a6f317SBarry Smith 
365935d8aa7fSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
366035d8aa7fSBarry Smith 
366111a5261eSBarry Smith    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3662273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3663273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3664273d9f13SBarry Smith 
3665273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
366611a5261eSBarry Smith    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3667651615e1SBarry Smith    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3668273d9f13SBarry Smith    matrices.
3669273d9f13SBarry Smith 
3670651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3671273d9f13SBarry Smith @*/
36729371c9d4SSatish Balay PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) {
3673273d9f13SBarry Smith   PetscFunctionBegin;
36749566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
36759566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
36769566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJ));
36779566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3678273d9f13SBarry Smith   PetscFunctionReturn(0);
3679273d9f13SBarry Smith }
3680273d9f13SBarry Smith 
3681273d9f13SBarry Smith /*@C
3682273d9f13SBarry Smith    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3683273d9f13SBarry Smith    per row in the matrix. For good matrix assembly performance the
3684273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3685273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3686273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
3687273d9f13SBarry Smith 
3688d083f849SBarry Smith    Collective
3689273d9f13SBarry Smith 
3690273d9f13SBarry Smith    Input Parameters:
36911c4f3114SJed Brown +  B - the matrix
369211a5261eSBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
369311a5261eSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3694273d9f13SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
3695273d9f13SBarry Smith -  nnz - array containing the number of block nonzeros in the various block rows
36960298fd71SBarry Smith          (possibly different for each block row) or NULL
3697273d9f13SBarry Smith 
3698273d9f13SBarry Smith    Options Database Keys:
369911a5261eSBarry Smith +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3700a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
3701273d9f13SBarry Smith 
3702273d9f13SBarry Smith    Level: intermediate
3703273d9f13SBarry Smith 
3704273d9f13SBarry Smith    Notes:
370549a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
370649a6f317SBarry Smith 
370711a5261eSBarry Smith    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3708aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3709aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3710aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3711aa95bbe8SBarry Smith 
371211a5261eSBarry Smith    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3713273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3714273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3715273d9f13SBarry Smith 
3716273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
371711a5261eSBarry Smith    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3718651615e1SBarry Smith    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3719273d9f13SBarry Smith 
3720651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3721273d9f13SBarry Smith @*/
37229371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) {
3723273d9f13SBarry Smith   PetscFunctionBegin;
37246ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
37256ba663aaSJed Brown   PetscValidType(B, 1);
37266ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3727cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3728273d9f13SBarry Smith   PetscFunctionReturn(0);
3729273d9f13SBarry Smith }
3730a1d92eedSBarry Smith 
3731725b52f3SLisandro Dalcin /*@C
373211a5261eSBarry Smith    MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3733725b52f3SLisandro Dalcin 
3734d083f849SBarry Smith    Collective
3735725b52f3SLisandro Dalcin 
3736725b52f3SLisandro Dalcin    Input Parameters:
37371c4f3114SJed Brown +  B - the matrix
3738725b52f3SLisandro Dalcin .  i - the indices into j for the start of each local row (starts with zero)
3739725b52f3SLisandro Dalcin .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3740725b52f3SLisandro Dalcin -  v - optional values in the matrix
3741725b52f3SLisandro Dalcin 
3742664954b6SBarry Smith    Level: advanced
3743725b52f3SLisandro Dalcin 
37443adadaf3SJed Brown    Notes:
374511a5261eSBarry Smith    The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
374611a5261eSBarry Smith    may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
37473adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
374811a5261eSBarry Smith    `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
37493adadaf3SJed Brown    block column and the second index is over columns within a block.
37503adadaf3SJed Brown 
3751664954b6SBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3752664954b6SBarry Smith 
3753db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3754725b52f3SLisandro Dalcin @*/
37559371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
3756725b52f3SLisandro Dalcin   PetscFunctionBegin;
37576ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
37586ba663aaSJed Brown   PetscValidType(B, 1);
37596ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3760cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3761725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3762725b52f3SLisandro Dalcin }
3763725b52f3SLisandro Dalcin 
3764c75a6043SHong Zhang /*@
376511a5261eSBarry Smith      MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3766c75a6043SHong Zhang 
3767d083f849SBarry Smith      Collective
3768c75a6043SHong Zhang 
3769c75a6043SHong Zhang    Input Parameters:
3770c75a6043SHong Zhang +  comm - must be an MPI communicator of size 1
3771c75a6043SHong Zhang .  bs - size of block
3772c75a6043SHong Zhang .  m - number of rows
3773c75a6043SHong Zhang .  n - number of columns
3774483a2f95SBarry Smith .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3775c75a6043SHong Zhang .  j - column indices
3776c75a6043SHong Zhang -  a - matrix values
3777c75a6043SHong Zhang 
3778c75a6043SHong Zhang    Output Parameter:
3779c75a6043SHong Zhang .  mat - the matrix
3780c75a6043SHong Zhang 
3781dfb205c3SBarry Smith    Level: advanced
3782c75a6043SHong Zhang 
3783c75a6043SHong Zhang    Notes:
3784c75a6043SHong Zhang        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3785c75a6043SHong Zhang     once the matrix is destroyed
3786c75a6043SHong Zhang 
3787c75a6043SHong Zhang        You cannot set new nonzero locations into this matrix, that will generate an error.
3788c75a6043SHong Zhang 
3789c75a6043SHong Zhang        The i and j indices are 0 based
3790c75a6043SHong Zhang 
379111a5261eSBarry Smith        When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3792dfb205c3SBarry Smith 
37933adadaf3SJed Brown       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
37943adadaf3SJed Brown       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
37953adadaf3SJed Brown       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
37963adadaf3SJed Brown       with column-major ordering within blocks.
3797dfb205c3SBarry Smith 
3798db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3799c75a6043SHong Zhang @*/
38009371c9d4SSatish Balay PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) {
3801c75a6043SHong Zhang   PetscInt     ii;
3802c75a6043SHong Zhang   Mat_SeqBAIJ *baij;
3803c75a6043SHong Zhang 
3804c75a6043SHong Zhang   PetscFunctionBegin;
38055f80ce2aSJacob Faibussowitsch   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
38065f80ce2aSJacob Faibussowitsch   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3807c75a6043SHong Zhang 
38089566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
38099566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, m, n));
38109566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATSEQBAIJ));
38119566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3812c75a6043SHong Zhang   baij = (Mat_SeqBAIJ *)(*mat)->data;
38139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3814c75a6043SHong Zhang 
3815c75a6043SHong Zhang   baij->i = i;
3816c75a6043SHong Zhang   baij->j = j;
3817c75a6043SHong Zhang   baij->a = a;
381826fbe8dcSKarl Rupp 
3819c75a6043SHong Zhang   baij->singlemalloc = PETSC_FALSE;
3820c75a6043SHong Zhang   baij->nonew        = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3821e6b907acSBarry Smith   baij->free_a       = PETSC_FALSE;
3822e6b907acSBarry Smith   baij->free_ij      = PETSC_FALSE;
3823c75a6043SHong Zhang 
3824c75a6043SHong Zhang   for (ii = 0; ii < m; ii++) {
3825c75a6043SHong Zhang     baij->ilen[ii] = baij->imax[ii] = i[ii + 1] - i[ii];
38266bdcaf15SBarry Smith     PetscCheck(i[ii + 1] - i[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, i[ii + 1] - i[ii]);
3827c75a6043SHong Zhang   }
382876bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
3829c75a6043SHong Zhang     for (ii = 0; ii < baij->i[m]; ii++) {
38306bdcaf15SBarry Smith       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
38316bdcaf15SBarry Smith       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3832c75a6043SHong Zhang     }
383376bd3646SJed Brown   }
3834c75a6043SHong Zhang 
38359566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
38369566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3837c75a6043SHong Zhang   PetscFunctionReturn(0);
3838c75a6043SHong Zhang }
3839bdf6f3fcSHong Zhang 
38409371c9d4SSatish Balay PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
3841bdf6f3fcSHong Zhang   PetscFunctionBegin;
38429566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3843bdf6f3fcSHong Zhang   PetscFunctionReturn(0);
3844bdf6f3fcSHong Zhang }
3845